go.mod: update osbuild/images to v0.74.0

This commit is contained in:
Gianluca Zuccarelli 2024-08-06 15:20:12 +01:00 committed by Sanne Raymaekers
parent 3789ff4ce8
commit c9972f7da8
327 changed files with 8341 additions and 12785 deletions

View file

@ -70,6 +70,8 @@ type (
}
)
const PaxSchilyXattr = "SCHILY.xattr."
const (
tarExt = "tar"
solaris = "solaris"
@ -169,10 +171,17 @@ func DetectCompression(source []byte) Compression {
}
// DecompressStream decompresses the archive and returns a ReaderCloser with the decompressed archive.
func DecompressStream(archive io.Reader) (io.ReadCloser, error) {
func DecompressStream(archive io.Reader) (_ io.ReadCloser, Err error) {
p := pools.BufioReader32KPool
buf := p.Get(archive)
bs, err := buf.Peek(10)
defer func() {
if Err != nil {
p.Put(buf)
}
}()
if err != nil && err != io.EOF {
// Note: we'll ignore any io.EOF error because there are some odd
// cases where the layer.tar file will be empty (zero bytes) and
@ -189,6 +198,12 @@ func DecompressStream(archive io.Reader) (io.ReadCloser, error) {
readBufWrapper := p.NewReadCloserWrapper(buf, buf)
return readBufWrapper, nil
case Gzip:
cleanup := func() {
p.Put(buf)
}
if rc, canUse := tryProcFilter([]string{"pigz", "-d"}, buf, cleanup); canUse {
return rc, nil
}
gzReader, err := gzip.NewReader(buf)
if err != nil {
return nil, err
@ -207,6 +222,12 @@ func DecompressStream(archive io.Reader) (io.ReadCloser, error) {
readBufWrapper := p.NewReadCloserWrapper(buf, xzReader)
return readBufWrapper, nil
case Zstd:
cleanup := func() {
p.Put(buf)
}
if rc, canUse := tryProcFilter([]string{"zstd", "-d"}, buf, cleanup); canUse {
return rc, nil
}
return zstdReader(buf)
default:
return nil, fmt.Errorf("unsupported compression format %s", (&compression).Extension())
@ -214,9 +235,16 @@ func DecompressStream(archive io.Reader) (io.ReadCloser, error) {
}
// CompressStream compresses the dest with specified compression algorithm.
func CompressStream(dest io.Writer, compression Compression) (io.WriteCloser, error) {
func CompressStream(dest io.Writer, compression Compression) (_ io.WriteCloser, Err error) {
p := pools.BufioWriter32KPool
buf := p.Get(dest)
defer func() {
if Err != nil {
p.Put(buf)
}
}()
switch compression {
case Uncompressed:
writeBufWrapper := p.NewWriteCloserWrapper(buf, buf)
@ -391,11 +419,11 @@ func FileInfoHeader(name string, fi os.FileInfo, link string) (*tar.Header, erro
return hdr, nil
}
// ReadSecurityXattrToTarHeader reads security.capability, security,image
// readSecurityXattrToTarHeader reads security.capability, security,image
// xattrs from filesystem to a tar header
func ReadSecurityXattrToTarHeader(path string, hdr *tar.Header) error {
if hdr.Xattrs == nil {
hdr.Xattrs = make(map[string]string)
func readSecurityXattrToTarHeader(path string, hdr *tar.Header) error {
if hdr.PAXRecords == nil {
hdr.PAXRecords = make(map[string]string)
}
for _, xattr := range []string{"security.capability", "security.ima"} {
capability, err := system.Lgetxattr(path, xattr)
@ -403,14 +431,14 @@ func ReadSecurityXattrToTarHeader(path string, hdr *tar.Header) error {
return fmt.Errorf("failed to read %q attribute from %q: %w", xattr, path, err)
}
if capability != nil {
hdr.Xattrs[xattr] = string(capability)
hdr.PAXRecords[PaxSchilyXattr+xattr] = string(capability)
}
}
return nil
}
// ReadUserXattrToTarHeader reads user.* xattr from filesystem to a tar header
func ReadUserXattrToTarHeader(path string, hdr *tar.Header) error {
// readUserXattrToTarHeader reads user.* xattr from filesystem to a tar header
func readUserXattrToTarHeader(path string, hdr *tar.Header) error {
xattrs, err := system.Llistxattr(path)
if err != nil && !errors.Is(err, system.EOPNOTSUPP) && err != system.ErrNotSupportedPlatform {
return err
@ -425,10 +453,10 @@ func ReadUserXattrToTarHeader(path string, hdr *tar.Header) error {
}
return err
}
if hdr.Xattrs == nil {
hdr.Xattrs = make(map[string]string)
if hdr.PAXRecords == nil {
hdr.PAXRecords = make(map[string]string)
}
hdr.Xattrs[key] = string(value)
hdr.PAXRecords[PaxSchilyXattr+key] = string(value)
}
}
return nil
@ -516,10 +544,10 @@ func (ta *tarAppender) addTarFile(path, name string) error {
if err != nil {
return err
}
if err := ReadSecurityXattrToTarHeader(path, hdr); err != nil {
if err := readSecurityXattrToTarHeader(path, hdr); err != nil {
return err
}
if err := ReadUserXattrToTarHeader(path, hdr); err != nil {
if err := readUserXattrToTarHeader(path, hdr); err != nil {
return err
}
if err := ReadFileFlagsToTarHeader(path, hdr); err != nil {
@ -642,7 +670,7 @@ func createTarFile(path, extractDir string, hdr *tar.Header, reader io.Reader, L
}
}
case tar.TypeReg, tar.TypeRegA:
case tar.TypeReg:
// Source is regular file. We use system.OpenFileSequential to use sequential
// file access to avoid depleting the standby list on Windows.
// On Linux, this equates to a regular os.OpenFile
@ -701,8 +729,11 @@ func createTarFile(path, extractDir string, hdr *tar.Header, reader io.Reader, L
}
if forceMask != nil && (hdr.Typeflag != tar.TypeSymlink || runtime.GOOS == "darwin") {
value := fmt.Sprintf("%d:%d:0%o", hdr.Uid, hdr.Gid, hdrInfo.Mode()&0o7777)
if err := system.Lsetxattr(path, idtools.ContainersOverrideXattr, []byte(value), 0); err != nil {
value := idtools.Stat{
IDs: idtools.IDPair{UID: hdr.Uid, GID: hdr.Gid},
Mode: hdrInfo.Mode() & 0o7777,
}
if err := idtools.SetContainersOverrideXattr(path, value); err != nil {
return err
}
}
@ -753,11 +784,15 @@ func createTarFile(path, extractDir string, hdr *tar.Header, reader io.Reader, L
}
var errs []string
for key, value := range hdr.Xattrs {
if _, found := xattrsToIgnore[key]; found {
for key, value := range hdr.PAXRecords {
xattrKey, ok := strings.CutPrefix(key, PaxSchilyXattr)
if !ok {
continue
}
if err := system.Lsetxattr(path, key, []byte(value), 0); err != nil {
if _, found := xattrsToIgnore[xattrKey]; found {
continue
}
if err := system.Lsetxattr(path, xattrKey, []byte(value), 0); err != nil {
if errors.Is(err, syscall.ENOTSUP) || (inUserns && errors.Is(err, syscall.EPERM)) {
// We ignore errors here because not all graphdrivers support
// xattrs *cough* old versions of AUFS *cough*. However only
@ -1113,9 +1148,14 @@ loop:
}
}
if options.ForceMask != nil && rootHdr != nil {
value := fmt.Sprintf("%d:%d:0%o", rootHdr.Uid, rootHdr.Gid, rootHdr.Mode)
if err := system.Lsetxattr(dest, idtools.ContainersOverrideXattr, []byte(value), 0); err != nil {
if options.ForceMask != nil {
value := idtools.Stat{Mode: 0o755}
if rootHdr != nil {
value.IDs.UID = rootHdr.Uid
value.IDs.GID = rootHdr.Gid
value.Mode = os.FileMode(rootHdr.Mode)
}
if err := idtools.SetContainersOverrideXattr(dest, value); err != nil {
return err
}
}
@ -1337,7 +1377,7 @@ func remapIDs(readIDMappings, writeIDMappings *idtools.IDMappings, chownOpts *id
}
} else if runtime.GOOS == darwin {
uid, gid = hdr.Uid, hdr.Gid
if xstat, ok := hdr.Xattrs[idtools.ContainersOverrideXattr]; ok {
if xstat, ok := hdr.PAXRecords[PaxSchilyXattr+idtools.ContainersOverrideXattr]; ok {
attrs := strings.Split(string(xstat), ":")
if len(attrs) == 3 {
val, err := strconv.ParseUint(attrs[0], 10, 32)

View file

@ -1,5 +1,5 @@
//go:build freebsd || darwin
// +build freebsd darwin
//go:build netbsd || freebsd || darwin
// +build netbsd freebsd darwin
package archive

View file

@ -48,8 +48,8 @@ func (o overlayWhiteoutConverter) ConvertWrite(hdr *tar.Header, path string, fi
return nil, err
}
if len(opaque) == 1 && opaque[0] == 'y' {
if hdr.Xattrs != nil {
delete(hdr.Xattrs, getOverlayOpaqueXattrName())
if hdr.PAXRecords != nil {
delete(hdr.PAXRecords, PaxSchilyXattr+getOverlayOpaqueXattrName())
}
// If there are no lower layers, then it can't have been deleted in this layer.
if len(o.rolayers) == 0 {

View file

@ -316,7 +316,11 @@ func parseDirent(buf []byte, names []nameIno) (consumed int, newnames []nameIno)
// with respect to the parent layers
func OverlayChanges(layers []string, rw string) ([]Change, error) {
dc := func(root, path string, fi os.FileInfo) (string, error) {
return overlayDeletedFile(layers, root, path, fi)
r, err := overlayDeletedFile(layers, root, path, fi)
if err != nil {
return "", fmt.Errorf("overlay deleted file query: %w", err)
}
return r, nil
}
return changes(layers, rw, dc, nil, overlayLowerContainsWhiteout)
}
@ -351,7 +355,7 @@ func overlayDeletedFile(layers []string, root, path string, fi os.FileInfo) (str
// If the directory isn't marked as opaque, then it's just a normal directory.
opaque, err := system.Lgetxattr(filepath.Join(root, path), getOverlayOpaqueXattrName())
if err != nil {
return "", err
return "", fmt.Errorf("failed querying overlay opaque xattr: %w", err)
}
if len(opaque) != 1 || opaque[0] != 'y' {
return "", err

View file

@ -31,9 +31,9 @@ func statDifferent(oldStat *system.StatT, oldInfo *FileInfo, newStat *system.Sta
ownerChanged ||
oldStat.Rdev() != newStat.Rdev() ||
oldStat.Flags() != newStat.Flags() ||
!sameFsTimeSpec(oldStat.Mtim(), newStat.Mtim()) ||
// Don't look at size for dirs, its not a good measure of change
(oldStat.Mode()&unix.S_IFDIR != unix.S_IFDIR &&
(!sameFsTimeSpec(oldStat.Mtim(), newStat.Mtim()) || (oldStat.Size() != newStat.Size()))) {
((oldStat.Mode()&unix.S_IFDIR != unix.S_IFDIR) && (oldStat.Size() != newStat.Size())) {
return true
}
return false

View file

@ -0,0 +1,73 @@
package archive
import (
"bytes"
"fmt"
"io"
"os/exec"
"strings"
"sync"
)
var filterPath sync.Map
func getFilterPath(name string) string {
path, ok := filterPath.Load(name)
if ok {
return path.(string)
}
path, err := exec.LookPath(name)
if err != nil {
path = ""
}
filterPath.Store(name, path)
return path.(string)
}
type errorRecordingReader struct {
r io.Reader
err error
}
func (r *errorRecordingReader) Read(p []byte) (int, error) {
n, err := r.r.Read(p)
if r.err == nil && err != io.EOF {
r.err = err
}
return n, err
}
// tryProcFilter tries to run the command specified in args, passing input to its stdin and returning its stdout.
// cleanup() is a caller provided function that will be called when the command finishes running, regardless of
// whether it succeeds or fails.
// If the command is not found, it returns (nil, false) and the cleanup function is not called.
func tryProcFilter(args []string, input io.Reader, cleanup func()) (io.ReadCloser, bool) {
path := getFilterPath(args[0])
if path == "" {
return nil, false
}
var stderrBuf bytes.Buffer
inputWithError := &errorRecordingReader{r: input}
r, w := io.Pipe()
cmd := exec.Command(path, args[1:]...)
cmd.Stdin = inputWithError
cmd.Stdout = w
cmd.Stderr = &stderrBuf
go func() {
err := cmd.Run()
// if there is an error reading from input, prefer to return that error
if inputWithError.err != nil {
err = inputWithError.err
} else if err != nil && stderrBuf.Len() > 0 {
err = fmt.Errorf("%s: %w", strings.TrimRight(stderrBuf.String(), "\n"), err)
}
w.CloseWithError(err) // CloseWithErr(nil) == Close()
cleanup()
}()
return r, true
}

View file

@ -10,9 +10,11 @@ func invokeUnpack(decompressedArchive io.Reader,
dest string,
options *archive.TarOptions, root string,
) error {
_ = root // Restricting the operation to this root is not implemented on macOS
return archive.Unpack(decompressedArchive, dest, options)
}
func invokePack(srcPath string, options *archive.TarOptions, root string) (io.ReadCloser, error) {
_ = root // Restricting the operation to this root is not implemented on macOS
return archive.TarWithOptions(srcPath, options)
}

View file

@ -107,12 +107,15 @@ func invokeUnpack(decompressedArchive io.Reader, dest string, options *archive.T
w.Close()
if err := cmd.Wait(); err != nil {
errorOut := fmt.Errorf("unpacking failed (error: %w; output: %s)", err, output)
// when `xz -d -c -q | storage-untar ...` failed on storage-untar side,
// we need to exhaust `xz`'s output, otherwise the `xz` side will be
// pending on write pipe forever
io.Copy(io.Discard, decompressedArchive)
if _, err := io.Copy(io.Discard, decompressedArchive); err != nil {
return fmt.Errorf("%w\nexhausting input failed (error: %w)", errorOut, err)
}
return fmt.Errorf("processing tar file(%s): %w", output, err)
return errorOut
}
return nil
}

View file

@ -19,10 +19,13 @@ import (
// Old root is removed after the call to pivot_root so it is no longer available under the new root.
// This is similar to how libcontainer sets up a container's rootfs
func chroot(path string) (err error) {
caps, err := capability.NewPid(0)
caps, err := capability.NewPid2(0)
if err != nil {
return err
}
if err := caps.Load(); err != nil {
return err
}
// initialize nss libraries in Glibc so that the dynamic libraries are loaded in the host
// environment not in the chroot from untrusted files.

View file

@ -40,11 +40,13 @@ func applyLayer() {
}
// We need to be able to set any perms
oldmask, err := system.Umask(0)
defer system.Umask(oldmask)
oldMask, err := system.Umask(0)
if err != nil {
fatal(err)
}
defer func() {
_, _ = system.Umask(oldMask) // Ignore err. This can only fail with ErrNotSupportedPlatform, in which case we would have failed above.
}()
if err := json.Unmarshal([]byte(os.Getenv("OPT")), &options); err != nil {
fatal(err)

View file

@ -2,10 +2,15 @@ package chunked
import (
"encoding/binary"
"fmt"
"hash/crc32"
"io"
"github.com/docker/go-units"
)
const bloomFilterMaxLength = 100 * units.MB // max size for bloom filter
type bloomFilter struct {
bitArray []uint64
k uint32
@ -79,6 +84,10 @@ func readBloomFilter(reader io.Reader) (*bloomFilter, error) {
if err := binary.Read(reader, binary.LittleEndian, &k); err != nil {
return nil, err
}
// sanity check
if bloomFilterLen > bloomFilterMaxLength {
return nil, fmt.Errorf("bloom filter length %d exceeds max length %d", bloomFilterLen, bloomFilterMaxLength)
}
bloomFilterArray := make([]uint64, bloomFilterLen)
if err := binary.Read(reader, binary.LittleEndian, &bloomFilterArray); err != nil {
return nil, err

View file

@ -18,6 +18,7 @@ import (
graphdriver "github.com/containers/storage/drivers"
"github.com/containers/storage/pkg/chunked/internal"
"github.com/containers/storage/pkg/ioutils"
"github.com/docker/go-units"
jsoniter "github.com/json-iterator/go"
digest "github.com/opencontainers/go-digest"
"github.com/sirupsen/logrus"
@ -34,6 +35,8 @@ const (
// https://pages.cs.wisc.edu/~cao/papers/summary-cache/node8.html
bloomFilterScale = 10 // how much bigger is the bloom filter than the number of entries
bloomFilterHashes = 3 // number of hash functions for the bloom filter
maxTagsLen = 100 * units.MB // max size for tags len
)
type cacheFile struct {
@ -77,7 +80,9 @@ var (
func (c *layer) release() {
runtime.SetFinalizer(c, nil)
if c.mmapBuffer != nil {
unix.Munmap(c.mmapBuffer)
if err := unix.Munmap(c.mmapBuffer); err != nil {
logrus.Warnf("Error Munmap: layer %q: %v", c.id, err)
}
}
}
@ -189,7 +194,9 @@ func (c *layersCache) loadLayerCache(layerID string) (_ *layer, errRet error) {
}
defer func() {
if errRet != nil && mmapBuffer != nil {
unix.Munmap(mmapBuffer)
if err := unix.Munmap(mmapBuffer); err != nil {
logrus.Warnf("Error Munmap: layer %q: %v", layerID, err)
}
}
}()
cacheFile, err := readCacheFileFromMemory(buffer)
@ -280,6 +287,13 @@ func (c *layersCache) load() error {
newLayers = append(newLayers, l)
continue
}
if r.ReadOnly {
// if the layer is coming from a read-only store, do not attempt
// to write to it.
continue
}
// the cache file is either not present or broken. Try to generate it from the TOC.
l, err = c.createCacheFileFromTOC(r.ID)
if err != nil {
@ -635,6 +649,14 @@ func readCacheFileFromMemory(bigDataBuffer []byte) (*cacheFile, error) {
if err := binary.Read(bigData, binary.LittleEndian, &fnamesLen); err != nil {
return nil, err
}
if tagsLen > maxTagsLen {
return nil, fmt.Errorf("tags len %d exceeds the maximum allowed size %d", tagsLen, maxTagsLen)
}
if digestLen > tagLen {
return nil, fmt.Errorf("digest len %d exceeds the tag len %d", digestLen, tagLen)
}
tags := make([]byte, tagsLen)
if _, err := bigData.Read(tags); err != nil {
return nil, err
@ -643,6 +665,10 @@ func readCacheFileFromMemory(bigDataBuffer []byte) (*cacheFile, error) {
// retrieve the unread part of the buffer.
remaining := bigDataBuffer[len(bigDataBuffer)-bigData.Len():]
if vdataLen >= uint64(len(remaining)) {
return nil, fmt.Errorf("vdata len %d exceeds the remaining buffer size %d", vdataLen, len(remaining))
}
vdata := remaining[:vdataLen]
fnames := remaining[vdataLen:]
@ -901,7 +927,7 @@ func unmarshalToc(manifest []byte) (*internal.TOC, error) {
s := iter.ReadString()
d, err := digest.Parse(s)
if err != nil {
return nil, fmt.Errorf("Invalid tarSplitDigest %q: %w", s, err)
return nil, fmt.Errorf("invalid tarSplitDigest %q: %w", s, err)
}
toc.TarSplitDigest = d

View file

@ -5,13 +5,16 @@ import (
"errors"
"fmt"
"io"
"maps"
"strconv"
"time"
"github.com/containers/storage/pkg/chunked/internal"
"github.com/klauspost/compress/zstd"
"github.com/klauspost/pgzip"
digest "github.com/opencontainers/go-digest"
"github.com/vbatts/tar-split/archive/tar"
expMaps "golang.org/x/exp/maps"
)
var typesToTar = map[string]byte{
@ -209,20 +212,162 @@ func readZstdChunkedManifest(blobStream ImageSourceSeekable, tocDigest digest.Di
}
decodedTarSplit := []byte{}
if tarSplitChunk.Offset > 0 {
if toc.TarSplitDigest != "" {
if tarSplitChunk.Offset <= 0 {
return nil, nil, nil, 0, fmt.Errorf("TOC requires a tar-split, but the %s annotation does not describe a position", internal.TarSplitInfoKey)
}
tarSplit, err := readBlob(tarSplitChunk.Length)
if err != nil {
return nil, nil, nil, 0, err
}
decodedTarSplit, err = decodeAndValidateBlob(tarSplit, tarSplitLengthUncompressed, toc.TarSplitDigest.String())
if err != nil {
return nil, nil, nil, 0, fmt.Errorf("validating and decompressing tar-split: %w", err)
}
// We use the TOC for creating on-disk files, but the tar-split for creating metadata
// when exporting the layer contents. Ensure the two match, otherwise local inspection of a container
// might be misleading about the exported contents.
if err := ensureTOCMatchesTarSplit(toc, decodedTarSplit); err != nil {
return nil, nil, nil, 0, fmt.Errorf("tar-split and TOC data is inconsistent: %w", err)
}
} else if tarSplitChunk.Offset > 0 {
// We must ignore the tar-split when the digest is not present in the TOC, because we cant authenticate it.
//
// But if we asked for the chunk, now we must consume the data to not block the producer.
// Ideally the GetBlobAt API should be changed so that this is not necessary.
_, err := readBlob(tarSplitChunk.Length)
if err != nil {
return nil, nil, nil, 0, err
}
}
return decodedBlob, toc, decodedTarSplit, int64(manifestChunk.Offset), err
}
// ensureTOCMatchesTarSplit validates that toc and tarSplit contain _exactly_ the same entries.
func ensureTOCMatchesTarSplit(toc *internal.TOC, tarSplit []byte) error {
pendingFiles := map[string]*internal.FileMetadata{} // Name -> an entry in toc.Entries
for i := range toc.Entries {
e := &toc.Entries[i]
if e.Type != internal.TypeChunk {
if _, ok := pendingFiles[e.Name]; ok {
return fmt.Errorf("TOC contains duplicate entries for path %q", e.Name)
}
pendingFiles[e.Name] = e
}
}
if err := iterateTarSplit(tarSplit, func(hdr *tar.Header) error {
e, ok := pendingFiles[hdr.Name]
if !ok {
return fmt.Errorf("tar-split contains an entry for %q missing in TOC", hdr.Name)
}
delete(pendingFiles, hdr.Name)
expected, err := internal.NewFileMetadata(hdr)
if err != nil {
return fmt.Errorf("determining expected metadata for %q: %w", hdr.Name, err)
}
if err := ensureFileMetadataAttributesMatch(e, &expected); err != nil {
return fmt.Errorf("TOC and tar-split metadata doesnt match: %w", err)
}
return nil
}); err != nil {
return err
}
if len(pendingFiles) != 0 {
remaining := expMaps.Keys(pendingFiles)
if len(remaining) > 5 {
remaining = remaining[:5] // Just to limit the size of the output.
}
return fmt.Errorf("TOC contains entries not present in tar-split, incl. %q", remaining)
}
return nil
}
// ensureTimePointersMatch ensures that a and b are equal
func ensureTimePointersMatch(a, b *time.Time) error {
// We didnt always use “timeIfNotZero” when creating the TOC, so treat time.IsZero the same as nil.
// The archive/tar code turns time.IsZero() timestamps into an Unix timestamp of 0 when writing, but turns an Unix timestamp of 0
// when writing into a (local-timezone) Jan 1 1970, which is not IsZero(). So, treat that the same as IsZero as well.
unixZero := time.Unix(0, 0)
if a != nil && (a.IsZero() || a.Equal(unixZero)) {
a = nil
}
if b != nil && (b.IsZero() || b.Equal(unixZero)) {
b = nil
}
switch {
case a == nil && b == nil:
return nil
case a == nil:
return fmt.Errorf("nil != %v", *b)
case b == nil:
return fmt.Errorf("%v != nil", *a)
default:
if a.Equal(*b) {
return nil
}
return fmt.Errorf("%v != %v", *a, *b)
}
}
// ensureFileMetadataAttributesMatch ensures that a and b match in file attributes (it ignores entries relevant to locating data
// in the tar stream or matching contents)
func ensureFileMetadataAttributesMatch(a, b *internal.FileMetadata) error {
// Keep this in sync with internal.FileMetadata!
if a.Type != b.Type {
return fmt.Errorf("mismatch of Type: %q != %q", a.Type, b.Type)
}
if a.Name != b.Name {
return fmt.Errorf("mismatch of Name: %q != %q", a.Name, b.Name)
}
if a.Linkname != b.Linkname {
return fmt.Errorf("mismatch of Linkname: %q != %q", a.Linkname, b.Linkname)
}
if a.Mode != b.Mode {
return fmt.Errorf("mismatch of Mode: %q != %q", a.Mode, b.Mode)
}
if a.Size != b.Size {
return fmt.Errorf("mismatch of Size: %q != %q", a.Size, b.Size)
}
if a.UID != b.UID {
return fmt.Errorf("mismatch of UID: %q != %q", a.UID, b.UID)
}
if a.GID != b.GID {
return fmt.Errorf("mismatch of GID: %q != %q", a.GID, b.GID)
}
if err := ensureTimePointersMatch(a.ModTime, b.ModTime); err != nil {
return fmt.Errorf("mismatch of ModTime: %w", err)
}
if err := ensureTimePointersMatch(a.AccessTime, b.AccessTime); err != nil {
return fmt.Errorf("mismatch of AccessTime: %w", err)
}
if err := ensureTimePointersMatch(a.ChangeTime, b.ChangeTime); err != nil {
return fmt.Errorf("mismatch of ChangeTime: %w", err)
}
if a.Devmajor != b.Devmajor {
return fmt.Errorf("mismatch of Devmajor: %q != %q", a.Devmajor, b.Devmajor)
}
if a.Devminor != b.Devminor {
return fmt.Errorf("mismatch of Devminor: %q != %q", a.Devminor, b.Devminor)
}
if !maps.Equal(a.Xattrs, b.Xattrs) {
return fmt.Errorf("mismatch of Xattrs: %q != %q", a.Xattrs, b.Xattrs)
}
// Digest is not compared
// Offset is not compared
// EndOffset is not compared
// ChunkSize is not compared
// ChunkOffset is not compared
// ChunkDigest is not compared
// ChunkType is not compared
return nil
}
func decodeAndValidateBlob(blob []byte, lengthUncompressed uint64, expectedCompressedChecksum string) ([]byte, error) {
d, err := digest.Parse(expectedCompressedChecksum)
if err != nil {

View file

@ -7,7 +7,6 @@ package compressor
import (
"bufio"
"bytes"
"encoding/base64"
"io"
"github.com/containers/storage/pkg/chunked/internal"
@ -369,34 +368,14 @@ func writeZstdChunkedStream(destFile io.Writer, outMetadata map[string]string, r
}
}
typ, err := internal.GetType(hdr.Typeflag)
mainEntry, err := internal.NewFileMetadata(hdr)
if err != nil {
return err
}
xattrs := make(map[string]string)
for k, v := range hdr.Xattrs {
xattrs[k] = base64.StdEncoding.EncodeToString([]byte(v))
}
entries := []internal.FileMetadata{
{
Type: typ,
Name: hdr.Name,
Linkname: hdr.Linkname,
Mode: hdr.Mode,
Size: hdr.Size,
UID: hdr.Uid,
GID: hdr.Gid,
ModTime: &hdr.ModTime,
AccessTime: &hdr.AccessTime,
ChangeTime: &hdr.ChangeTime,
Devmajor: hdr.Devmajor,
Devminor: hdr.Devminor,
Xattrs: xattrs,
Digest: checksum,
Offset: startOffset,
EndOffset: lastOffset,
},
}
mainEntry.Digest = checksum
mainEntry.Offset = startOffset
mainEntry.EndOffset = lastOffset
entries := []internal.FileMetadata{mainEntry}
for i := 1; i < len(chunks); i++ {
entries = append(entries, internal.FileMetadata{
Type: internal.TypeChunk,

View file

@ -1,13 +1,16 @@
//go:build unix
package dump
import (
"bufio"
"encoding/base64"
"fmt"
"io"
"path/filepath"
"reflect"
"strings"
"time"
"unicode"
"github.com/containers/storage/pkg/chunked/internal"
"golang.org/x/sys/unix"
@ -20,20 +23,26 @@ const (
ESCAPE_LONE_DASH
)
func escaped(val string, escape int) string {
func escaped(val []byte, escape int) string {
noescapeSpace := escape&NOESCAPE_SPACE != 0
escapeEqual := escape&ESCAPE_EQUAL != 0
escapeLoneDash := escape&ESCAPE_LONE_DASH != 0
length := len(val)
if escapeLoneDash && val == "-" {
if escapeLoneDash && len(val) == 1 && val[0] == '-' {
return fmt.Sprintf("\\x%.2x", val[0])
}
// This is intended to match the C isprint API with LC_CTYPE=C
isprint := func(c byte) bool {
return c >= 32 && c < 127
}
// This is intended to match the C isgraph API with LC_CTYPE=C
isgraph := func(c byte) bool {
return c > 32 && c < 127
}
var result string
for i := 0; i < length; i++ {
c := val[i]
for _, c := range []byte(val) {
hexEscape := false
var special string
@ -50,9 +59,9 @@ func escaped(val string, escape int) string {
hexEscape = escapeEqual
default:
if noescapeSpace {
hexEscape = !unicode.IsPrint(rune(c))
hexEscape = !isprint(c)
} else {
hexEscape = !unicode.IsPrint(rune(c)) || unicode.IsSpace(rune(c))
hexEscape = !isgraph(c)
}
}
@ -67,8 +76,8 @@ func escaped(val string, escape int) string {
return result
}
func escapedOptional(val string, escape int) string {
if val == "" {
func escapedOptional(val []byte, escape int) string {
if len(val) == 0 {
return "-"
}
return escaped(val, escape)
@ -104,10 +113,31 @@ func sanitizeName(name string) string {
return path
}
func dumpNode(out io.Writer, links map[string]int, verityDigests map[string]string, entry *internal.FileMetadata) error {
func dumpNode(out io.Writer, added map[string]*internal.FileMetadata, links map[string]int, verityDigests map[string]string, entry *internal.FileMetadata) error {
path := sanitizeName(entry.Name)
if _, err := fmt.Fprint(out, escaped(path, ESCAPE_STANDARD)); err != nil {
parent := filepath.Dir(path)
if _, found := added[parent]; !found && path != "/" {
parentEntry := &internal.FileMetadata{
Name: parent,
Type: internal.TypeDir,
Mode: 0o755,
}
if err := dumpNode(out, added, links, verityDigests, parentEntry); err != nil {
return err
}
}
if e, found := added[path]; found {
// if the entry was already added, make sure it has the same data
if !reflect.DeepEqual(*e, *entry) {
return fmt.Errorf("entry %q already added with different data", path)
}
return nil
}
added[path] = entry
if _, err := fmt.Fprint(out, escaped([]byte(path), ESCAPE_STANDARD)); err != nil {
return err
}
@ -151,7 +181,7 @@ func dumpNode(out io.Writer, links map[string]int, verityDigests map[string]stri
}
}
if _, err := fmt.Fprintf(out, escapedOptional(payload, ESCAPE_LONE_DASH)); err != nil {
if _, err := fmt.Fprint(out, escapedOptional([]byte(payload), ESCAPE_LONE_DASH)); err != nil {
return err
}
@ -165,14 +195,18 @@ func dumpNode(out io.Writer, links map[string]int, verityDigests map[string]stri
return err
}
digest := verityDigests[payload]
if _, err := fmt.Fprintf(out, escapedOptional(digest, ESCAPE_LONE_DASH)); err != nil {
if _, err := fmt.Fprint(out, escapedOptional([]byte(digest), ESCAPE_LONE_DASH)); err != nil {
return err
}
for k, v := range entry.Xattrs {
name := escaped(k, ESCAPE_EQUAL)
value := escaped(v, ESCAPE_EQUAL)
for k, vEncoded := range entry.Xattrs {
v, err := base64.StdEncoding.DecodeString(vEncoded)
if err != nil {
return fmt.Errorf("decode xattr %q: %w", k, err)
}
name := escaped([]byte(k), ESCAPE_EQUAL)
value := escaped(v, ESCAPE_EQUAL)
if _, err := fmt.Fprintf(out, " %s=%s", name, value); err != nil {
return err
}
@ -201,6 +235,7 @@ func GenerateDump(tocI interface{}, verityDigests map[string]string) (io.Reader,
}()
links := make(map[string]int)
added := make(map[string]*internal.FileMetadata)
for _, e := range toc.Entries {
if e.Linkname == "" {
continue
@ -211,14 +246,14 @@ func GenerateDump(tocI interface{}, verityDigests map[string]string) (io.Reader,
links[e.Linkname] = links[e.Linkname] + 1
}
if len(toc.Entries) == 0 || (sanitizeName(toc.Entries[0].Name) != "/") {
if len(toc.Entries) == 0 {
root := &internal.FileMetadata{
Name: "/",
Type: internal.TypeDir,
Mode: 0o755,
}
if err := dumpNode(w, links, verityDigests, root); err != nil {
if err := dumpNode(w, added, links, verityDigests, root); err != nil {
pipeW.CloseWithError(err)
closed = true
return
@ -229,7 +264,7 @@ func GenerateDump(tocI interface{}, verityDigests map[string]string) (io.Reader,
if e.Type == internal.TypeChunk {
continue
}
if err := dumpNode(w, links, verityDigests, &e); err != nil {
if err := dumpNode(w, added, links, verityDigests, &e); err != nil {
pipeW.CloseWithError(err)
closed = true
return

View file

@ -0,0 +1,605 @@
package chunked
import (
"encoding/base64"
"errors"
"fmt"
"io"
"io/fs"
"os"
"path/filepath"
"strings"
"sync/atomic"
"syscall"
"time"
driversCopy "github.com/containers/storage/drivers/copy"
"github.com/containers/storage/pkg/archive"
"github.com/containers/storage/pkg/chunked/internal"
securejoin "github.com/cyphar/filepath-securejoin"
"github.com/vbatts/tar-split/archive/tar"
"golang.org/x/sys/unix"
)
// procPathForFile returns an absolute path in /proc which
// refers to the file; see procPathForFd.
func procPathForFile(f *os.File) string {
return procPathForFd(int(f.Fd()))
}
// procPathForFd returns an absolute path in /proc which
// refers to the file; this allows passing a file descriptor
// in places that don't accept a file descriptor.
func procPathForFd(fd int) string {
return fmt.Sprintf("/proc/self/fd/%d", fd)
}
// fileMetadata is a wrapper around internal.FileMetadata with additional private fields that
// are not part of the TOC document.
// Type: TypeChunk entries are stored in Chunks, the primary [fileMetadata] entries never use TypeChunk.
type fileMetadata struct {
internal.FileMetadata
// chunks stores the TypeChunk entries relevant to this entry when FileMetadata.Type == TypeReg.
chunks []*internal.FileMetadata
// skipSetAttrs is set when the file attributes must not be
// modified, e.g. it is a hard link from a different source,
// or a composefs file.
skipSetAttrs bool
}
func doHardLink(dirfd, srcFd int, destFile string) error {
destDir, destBase := filepath.Split(destFile)
destDirFd := dirfd
if destDir != "" && destDir != "." {
f, err := openOrCreateDirUnderRoot(dirfd, destDir, 0)
if err != nil {
return err
}
defer f.Close()
destDirFd = int(f.Fd())
}
doLink := func() error {
// Using unix.AT_EMPTY_PATH requires CAP_DAC_READ_SEARCH while this variant that uses
// /proc/self/fd doesn't and can be used with rootless.
srcPath := procPathForFd(srcFd)
err := unix.Linkat(unix.AT_FDCWD, srcPath, destDirFd, destBase, unix.AT_SYMLINK_FOLLOW)
if err != nil {
return &fs.PathError{Op: "linkat", Path: destFile, Err: err}
}
return nil
}
err := doLink()
// if the destination exists, unlink it first and try again
if err != nil && os.IsExist(err) {
if err := unix.Unlinkat(destDirFd, destBase, 0); err != nil {
return err
}
return doLink()
}
return err
}
func copyFileContent(srcFd int, fileMetadata *fileMetadata, dirfd int, mode os.FileMode, useHardLinks bool) (*os.File, int64, error) {
destFile := fileMetadata.Name
src := procPathForFd(srcFd)
st, err := os.Stat(src)
if err != nil {
return nil, -1, fmt.Errorf("copy file content for %q: %w", destFile, err)
}
copyWithFileRange, copyWithFileClone := true, true
if useHardLinks {
err := doHardLink(dirfd, srcFd, destFile)
if err == nil {
// if the file was deduplicated with a hard link, skip overriding file metadata.
fileMetadata.skipSetAttrs = true
return nil, st.Size(), nil
}
}
// If the destination file already exists, we shouldn't blow it away
dstFile, err := openFileUnderRoot(dirfd, destFile, newFileFlags, mode)
if err != nil {
return nil, -1, fmt.Errorf("open file %q under rootfs for copy: %w", destFile, err)
}
err = driversCopy.CopyRegularToFile(src, dstFile, st, &copyWithFileRange, &copyWithFileClone)
if err != nil {
dstFile.Close()
return nil, -1, fmt.Errorf("copy to file %q under rootfs: %w", destFile, err)
}
return dstFile, st.Size(), nil
}
func timeToTimespec(time *time.Time) (ts unix.Timespec) {
if time == nil || time.IsZero() {
// Return UTIME_OMIT special value
ts.Sec = 0
ts.Nsec = ((1 << 30) - 2)
return
}
return unix.NsecToTimespec(time.UnixNano())
}
// chown changes the owner and group of the file at the specified path under the directory
// pointed by dirfd.
// If nofollow is true, the function will not follow symlinks.
// If path is empty, the function will change the owner and group of the file descriptor.
// absolutePath is the absolute path of the file, used only for error messages.
func chown(dirfd int, path string, uid, gid int, nofollow bool, absolutePath string) error {
var err error
flags := 0
if nofollow {
flags |= unix.AT_SYMLINK_NOFOLLOW
} else if path == "" {
flags |= unix.AT_EMPTY_PATH
}
err = unix.Fchownat(dirfd, path, uid, gid, flags)
if err == nil {
return nil
}
if errors.Is(err, syscall.EINVAL) {
return fmt.Errorf(`potentially insufficient UIDs or GIDs available in the user namespace (requested %d:%d for %s): Check /etc/subuid and /etc/subgid if configured locally and run "podman system migrate": %w`, uid, gid, path, err)
}
return &fs.PathError{Op: "fchownat", Path: absolutePath, Err: err}
}
// setFileAttrs sets the file attributes for file given metadata
func setFileAttrs(dirfd int, file *os.File, mode os.FileMode, metadata *fileMetadata, options *archive.TarOptions, usePath bool) error {
if metadata.skipSetAttrs {
return nil
}
if file == nil {
return errors.New("invalid file")
}
fd := int(file.Fd())
t, err := typeToTarType(metadata.Type)
if err != nil {
return err
}
// If it is a symlink, force to use the path
if t == tar.TypeSymlink {
usePath = true
}
baseName := ""
if usePath {
dirName := filepath.Dir(metadata.Name)
if dirName != "" {
parentFd, err := openFileUnderRoot(dirfd, dirName, unix.O_PATH|unix.O_DIRECTORY, 0)
if err != nil {
return err
}
defer parentFd.Close()
dirfd = int(parentFd.Fd())
}
baseName = filepath.Base(metadata.Name)
}
doChown := func() error {
var err error
if usePath {
err = chown(dirfd, baseName, metadata.UID, metadata.GID, true, metadata.Name)
} else {
err = chown(fd, "", metadata.UID, metadata.GID, false, metadata.Name)
}
if options.IgnoreChownErrors {
return nil
}
return err
}
doSetXattr := func(k string, v []byte) error {
err := unix.Fsetxattr(fd, k, v, 0)
if err != nil {
return &fs.PathError{Op: "fsetxattr", Path: metadata.Name, Err: err}
}
return nil
}
doUtimes := func() error {
ts := []unix.Timespec{timeToTimespec(metadata.AccessTime), timeToTimespec(metadata.ModTime)}
var err error
if usePath {
err = unix.UtimesNanoAt(dirfd, baseName, ts, unix.AT_SYMLINK_NOFOLLOW)
} else {
err = unix.UtimesNanoAt(unix.AT_FDCWD, procPathForFd(fd), ts, 0)
}
if err != nil {
return &fs.PathError{Op: "utimensat", Path: metadata.Name, Err: err}
}
return nil
}
doChmod := func() error {
var err error
op := ""
if usePath {
err = unix.Fchmodat(dirfd, baseName, uint32(mode), unix.AT_SYMLINK_NOFOLLOW)
op = "fchmodat"
} else {
err = unix.Fchmod(fd, uint32(mode))
op = "fchmod"
}
if err != nil {
return &fs.PathError{Op: op, Path: metadata.Name, Err: err}
}
return nil
}
if err := doChown(); err != nil {
return err
}
canIgnore := func(err error) bool {
return err == nil || errors.Is(err, unix.ENOSYS) || errors.Is(err, unix.ENOTSUP)
}
for k, v := range metadata.Xattrs {
if _, found := xattrsToIgnore[k]; found {
continue
}
data, err := base64.StdEncoding.DecodeString(v)
if err != nil {
return fmt.Errorf("decode xattr %q: %w", v, err)
}
if err := doSetXattr(k, data); !canIgnore(err) {
return fmt.Errorf("set xattr %s=%q for %q: %w", k, data, metadata.Name, err)
}
}
if err := doUtimes(); !canIgnore(err) {
return err
}
if err := doChmod(); !canIgnore(err) {
return err
}
return nil
}
func openFileUnderRootFallback(dirfd int, name string, flags uint64, mode os.FileMode) (int, error) {
root := procPathForFd(dirfd)
targetRoot, err := os.Readlink(root)
if err != nil {
return -1, err
}
hasNoFollow := (flags & unix.O_NOFOLLOW) != 0
var fd int
// If O_NOFOLLOW is specified in the flags, then resolve only the parent directory and use the
// last component as the path to openat().
if hasNoFollow {
dirName, baseName := filepath.Split(name)
if dirName != "" && dirName != "." {
newRoot, err := securejoin.SecureJoin(root, dirName)
if err != nil {
return -1, err
}
root = newRoot
}
parentDirfd, err := unix.Open(root, unix.O_PATH|unix.O_CLOEXEC, 0)
if err != nil {
return -1, &fs.PathError{Op: "open", Path: root, Err: err}
}
defer unix.Close(parentDirfd)
fd, err = unix.Openat(parentDirfd, baseName, int(flags), uint32(mode))
if err != nil {
return -1, &fs.PathError{Op: "openat", Path: name, Err: err}
}
} else {
newPath, err := securejoin.SecureJoin(root, name)
if err != nil {
return -1, err
}
fd, err = unix.Openat(dirfd, newPath, int(flags), uint32(mode))
if err != nil {
return -1, &fs.PathError{Op: "openat", Path: newPath, Err: err}
}
}
target, err := os.Readlink(procPathForFd(fd))
if err != nil {
unix.Close(fd)
return -1, err
}
// Add an additional check to make sure the opened fd is inside the rootfs
if !strings.HasPrefix(target, targetRoot) {
unix.Close(fd)
return -1, fmt.Errorf("while resolving %q. It resolves outside the root directory", name)
}
return fd, err
}
func openFileUnderRootOpenat2(dirfd int, name string, flags uint64, mode os.FileMode) (int, error) {
how := unix.OpenHow{
Flags: flags,
Mode: uint64(mode & 0o7777),
Resolve: unix.RESOLVE_IN_ROOT,
}
fd, err := unix.Openat2(dirfd, name, &how)
if err != nil {
return -1, &fs.PathError{Op: "openat2", Path: name, Err: err}
}
return fd, nil
}
// skipOpenat2 is set when openat2 is not supported by the underlying kernel and avoid
// using it again.
var skipOpenat2 int32
// openFileUnderRootRaw tries to open a file using openat2 and if it is not supported fallbacks to a
// userspace lookup.
func openFileUnderRootRaw(dirfd int, name string, flags uint64, mode os.FileMode) (int, error) {
var fd int
var err error
if name == "" {
fd, err := unix.Dup(dirfd)
if err != nil {
return -1, fmt.Errorf("failed to duplicate file descriptor %d: %w", dirfd, err)
}
return fd, nil
}
if atomic.LoadInt32(&skipOpenat2) > 0 {
fd, err = openFileUnderRootFallback(dirfd, name, flags, mode)
} else {
fd, err = openFileUnderRootOpenat2(dirfd, name, flags, mode)
// If the function failed with ENOSYS, switch off the support for openat2
// and fallback to using safejoin.
if err != nil && errors.Is(err, unix.ENOSYS) {
atomic.StoreInt32(&skipOpenat2, 1)
fd, err = openFileUnderRootFallback(dirfd, name, flags, mode)
}
}
return fd, err
}
// openFileUnderRoot safely opens a file under the specified root directory using openat2
// dirfd is an open file descriptor to the target checkout directory.
// name is the path to open relative to dirfd.
// flags are the flags to pass to the open syscall.
// mode specifies the mode to use for newly created files.
func openFileUnderRoot(dirfd int, name string, flags uint64, mode os.FileMode) (*os.File, error) {
fd, err := openFileUnderRootRaw(dirfd, name, flags, mode)
if err == nil {
return os.NewFile(uintptr(fd), name), nil
}
hasCreate := (flags & unix.O_CREAT) != 0
if errors.Is(err, unix.ENOENT) && hasCreate {
parent := filepath.Dir(name)
if parent != "" {
newDirfd, err2 := openOrCreateDirUnderRoot(dirfd, parent, 0)
if err2 == nil {
defer newDirfd.Close()
fd, err := openFileUnderRootRaw(int(newDirfd.Fd()), filepath.Base(name), flags, mode)
if err == nil {
return os.NewFile(uintptr(fd), name), nil
}
}
}
}
return nil, fmt.Errorf("open %q under the rootfs: %w", name, err)
}
// openOrCreateDirUnderRoot safely opens a directory or create it if it is missing.
// dirfd is an open file descriptor to the target checkout directory.
// name is the path to open relative to dirfd.
// mode specifies the mode to use for newly created files.
func openOrCreateDirUnderRoot(dirfd int, name string, mode os.FileMode) (*os.File, error) {
fd, err := openFileUnderRootRaw(dirfd, name, unix.O_DIRECTORY|unix.O_RDONLY, 0)
if err == nil {
return os.NewFile(uintptr(fd), name), nil
}
if errors.Is(err, unix.ENOENT) {
parent := filepath.Dir(name)
if parent != "" {
pDir, err2 := openOrCreateDirUnderRoot(dirfd, parent, mode)
if err2 != nil {
return nil, err
}
defer pDir.Close()
baseName := filepath.Base(name)
if err2 := unix.Mkdirat(int(pDir.Fd()), baseName, uint32(mode)); err2 != nil {
return nil, &fs.PathError{Op: "mkdirat", Path: name, Err: err2}
}
fd, err = openFileUnderRootRaw(int(pDir.Fd()), baseName, unix.O_DIRECTORY|unix.O_RDONLY, 0)
if err == nil {
return os.NewFile(uintptr(fd), name), nil
}
}
}
return nil, err
}
// appendHole creates a hole with the specified size at the open fd.
// fd is the open file descriptor.
// name is the path to use for error messages.
// size is the size of the hole to create.
func appendHole(fd int, name string, size int64) error {
off, err := unix.Seek(fd, size, unix.SEEK_CUR)
if err != nil {
return &fs.PathError{Op: "seek", Path: name, Err: err}
}
// Make sure the file size is changed. It might be the last hole and no other data written afterwards.
if err := unix.Ftruncate(fd, off); err != nil {
return &fs.PathError{Op: "ftruncate", Path: name, Err: err}
}
return nil
}
func safeMkdir(dirfd int, mode os.FileMode, name string, metadata *fileMetadata, options *archive.TarOptions) error {
parent, base := filepath.Split(name)
parentFd := dirfd
if parent != "" && parent != "." {
parentFile, err := openOrCreateDirUnderRoot(dirfd, parent, 0)
if err != nil {
return err
}
defer parentFile.Close()
parentFd = int(parentFile.Fd())
}
if err := unix.Mkdirat(parentFd, base, uint32(mode)); err != nil {
if !os.IsExist(err) {
return &fs.PathError{Op: "mkdirat", Path: name, Err: err}
}
}
file, err := openFileUnderRoot(parentFd, base, unix.O_DIRECTORY|unix.O_RDONLY, 0)
if err != nil {
return err
}
defer file.Close()
return setFileAttrs(dirfd, file, mode, metadata, options, false)
}
func safeLink(dirfd int, mode os.FileMode, metadata *fileMetadata, options *archive.TarOptions) error {
sourceFile, err := openFileUnderRoot(dirfd, metadata.Linkname, unix.O_PATH|unix.O_RDONLY|unix.O_NOFOLLOW, 0)
if err != nil {
return err
}
defer sourceFile.Close()
err = doHardLink(dirfd, int(sourceFile.Fd()), metadata.Name)
if err != nil {
return err
}
newFile, err := openFileUnderRoot(dirfd, metadata.Name, unix.O_WRONLY|unix.O_NOFOLLOW, 0)
if err != nil {
// If the target is a symlink, open the file with O_PATH.
if errors.Is(err, unix.ELOOP) {
newFile, err := openFileUnderRoot(dirfd, metadata.Name, unix.O_PATH|unix.O_NOFOLLOW, 0)
if err != nil {
return err
}
defer newFile.Close()
return setFileAttrs(dirfd, newFile, mode, metadata, options, true)
}
return err
}
defer newFile.Close()
return setFileAttrs(dirfd, newFile, mode, metadata, options, false)
}
func safeSymlink(dirfd int, metadata *fileMetadata) error {
destDir, destBase := filepath.Split(metadata.Name)
destDirFd := dirfd
if destDir != "" && destDir != "." {
f, err := openOrCreateDirUnderRoot(dirfd, destDir, 0)
if err != nil {
return err
}
defer f.Close()
destDirFd = int(f.Fd())
}
if err := unix.Symlinkat(metadata.Linkname, destDirFd, destBase); err != nil {
return &fs.PathError{Op: "symlinkat", Path: metadata.Name, Err: err}
}
return nil
}
type whiteoutHandler struct {
Dirfd int
Root string
}
func (d whiteoutHandler) Setxattr(path, name string, value []byte) error {
file, err := openOrCreateDirUnderRoot(d.Dirfd, path, 0)
if err != nil {
return err
}
defer file.Close()
if err := unix.Fsetxattr(int(file.Fd()), name, value, 0); err != nil {
return &fs.PathError{Op: "fsetxattr", Path: path, Err: err}
}
return nil
}
func (d whiteoutHandler) Mknod(path string, mode uint32, dev int) error {
dir, base := filepath.Split(path)
dirfd := d.Dirfd
if dir != "" && dir != "." {
dir, err := openOrCreateDirUnderRoot(d.Dirfd, dir, 0)
if err != nil {
return err
}
defer dir.Close()
dirfd = int(dir.Fd())
}
if err := unix.Mknodat(dirfd, base, mode, dev); err != nil {
return &fs.PathError{Op: "mknodat", Path: path, Err: err}
}
return nil
}
func (d whiteoutHandler) Chown(path string, uid, gid int) error {
file, err := openFileUnderRoot(d.Dirfd, path, unix.O_PATH, 0)
if err != nil {
return err
}
defer file.Close()
return chown(int(file.Fd()), "", uid, gid, false, path)
}
type readerAtCloser interface {
io.ReaderAt
io.Closer
}
// seekableFile is a struct that wraps an *os.File to provide an ImageSourceSeekable.
type seekableFile struct {
reader readerAtCloser
}
func (f *seekableFile) Close() error {
return f.reader.Close()
}
func (f *seekableFile) GetBlobAt(chunks []ImageSourceChunk) (chan io.ReadCloser, chan error, error) {
streams := make(chan io.ReadCloser)
errs := make(chan error)
go func() {
for _, chunk := range chunks {
streams <- io.NopCloser(io.NewSectionReader(f.reader, int64(chunk.Offset), int64(chunk.Length)))
}
close(streams)
close(errs)
}()
return streams, errs, nil
}
func newSeekableFile(reader readerAtCloser) *seekableFile {
return &seekableFile{reader: reader}
}

View file

@ -5,25 +5,57 @@ package internal
// larger software like the graph drivers.
import (
"archive/tar"
"bytes"
"encoding/base64"
"encoding/binary"
"fmt"
"io"
"strings"
"time"
"github.com/containers/storage/pkg/archive"
jsoniter "github.com/json-iterator/go"
"github.com/klauspost/compress/zstd"
"github.com/opencontainers/go-digest"
"github.com/vbatts/tar-split/archive/tar"
)
// TOC is short for Table of Contents and is used by the zstd:chunked
// file format to effectively add an overall index into the contents
// of a tarball; it also includes file metadata.
type TOC struct {
Version int `json:"version"`
Entries []FileMetadata `json:"entries"`
TarSplitDigest digest.Digest `json:"tarSplitDigest,omitempty"`
// Version is currently expected to be 1
Version int `json:"version"`
// Entries is the list of file metadata in this TOC.
// The ordering in this array currently defaults to being the same
// as that of the tar stream; however, this should not be relied on.
Entries []FileMetadata `json:"entries"`
// TarSplitDigest is the checksum of the "tar-split" data which
// is included as a distinct skippable zstd frame before the TOC.
TarSplitDigest digest.Digest `json:"tarSplitDigest,omitempty"`
}
// FileMetadata is an entry in the TOC that includes both generic file metadata
// that duplicates what can found in the tar header (and should match), but
// also special/custom content (see below).
//
// Regular files may optionally be represented as a sequence of “chunks”,
// which may be ChunkTypeData or ChunkTypeZeros (and ChunkTypeData boundaries
// are heuristically determined to increase chance of chunk matching / reuse
// similar to rsync). In that case, the regular file is represented
// as an initial TypeReg entry (with all metadata for the file as a whole)
// immediately followed by zero or more TypeChunk entries (containing only Type,
// Name and Chunk* fields); if there is at least one TypeChunk entry, the Chunk*
// fields are relevant in all of these entries, including the initial
// TypeReg one.
//
// Note that the metadata here, when fetched by a zstd:chunked aware client,
// is used instead of that in the tar stream. The contents of the tar stream
// are not used in this scenario.
type FileMetadata struct {
// If you add any fields, update ensureFileMetadataMatches as well!
// The metadata below largely duplicates that in the tar headers.
Type string `json:"type"`
Name string `json:"name"`
Linkname string `json:"linkName,omitempty"`
@ -37,9 +69,11 @@ type FileMetadata struct {
Devmajor int64 `json:"devMajor,omitempty"`
Devminor int64 `json:"devMinor,omitempty"`
Xattrs map[string]string `json:"xattrs,omitempty"`
Digest string `json:"digest,omitempty"`
Offset int64 `json:"offset,omitempty"`
EndOffset int64 `json:"endOffset,omitempty"`
// Digest is a hexadecimal sha256 checksum of the file contents; it
// is empty for empty files
Digest string `json:"digest,omitempty"`
Offset int64 `json:"offset,omitempty"`
EndOffset int64 `json:"endOffset,omitempty"`
ChunkSize int64 `json:"chunkSize,omitempty"`
ChunkOffset int64 `json:"chunkOffset,omitempty"`
@ -53,19 +87,23 @@ const (
)
const (
// The following types correspond to regular types of entries that can
// appear in a tar archive.
TypeReg = "reg"
TypeChunk = "chunk"
TypeLink = "hardlink"
TypeChar = "char"
TypeBlock = "block"
TypeDir = "dir"
TypeFifo = "fifo"
TypeSymlink = "symlink"
// TypeChunk is special; in zstd:chunked not only are files individually
// compressed and indexable, there is a "rolling checksum" used to compute
// "chunks" of individual file contents, that are also added to the TOC
TypeChunk = "chunk"
)
var TarTypes = map[byte]string{
tar.TypeReg: TypeReg,
tar.TypeRegA: TypeReg,
tar.TypeLink: TypeLink,
tar.TypeChar: TypeChar,
tar.TypeBlock: TypeBlock,
@ -83,11 +121,23 @@ func GetType(t byte) (string, error) {
}
const (
// ManifestChecksumKey is a hexadecimal sha256 digest of the compressed manifest digest.
ManifestChecksumKey = "io.github.containers.zstd-chunked.manifest-checksum"
ManifestInfoKey = "io.github.containers.zstd-chunked.manifest-position"
TarSplitInfoKey = "io.github.containers.zstd-chunked.tarsplit-position"
// ManifestInfoKey is an annotation that signals the start of the TOC (manifest)
// contents which are embedded as a skippable zstd frame. It has a format of
// four decimal integers separated by `:` as follows:
// <offset>:<length>:<uncompressed length>:<type>
// The <type> is ManifestTypeCRFS which should have the value `1`.
ManifestInfoKey = "io.github.containers.zstd-chunked.manifest-position"
// TarSplitInfoKey is an annotation that signals the start of the "tar-split" metadata
// contents which are embedded as a skippable zstd frame. It has a format of
// three decimal integers separated by `:` as follows:
// <offset>:<length>:<uncompressed length>
TarSplitInfoKey = "io.github.containers.zstd-chunked.tarsplit-position"
TarSplitChecksumKey = "io.github.containers.zstd-chunked.tarsplit-checksum" // Deprecated: Use the TOC.TarSplitDigest field instead, this annotation is no longer read nor written.
// TarSplitChecksumKey is no longer used and is replaced by the TOC.TarSplitDigest field instead.
// The value is retained here as a constant as a historical reference for older zstd:chunked images.
// TarSplitChecksumKey = "io.github.containers.zstd-chunked.tarsplit-checksum"
// ManifestTypeCRFS is a manifest file compatible with the CRFS TOC file.
ManifestTypeCRFS = 1
@ -232,3 +282,43 @@ func footerDataToBlob(footer ZstdChunkedFooterData) []byte {
return manifestDataLE
}
// timeIfNotZero returns a pointer to the time.Time if it is not zero, otherwise it returns nil.
func timeIfNotZero(t *time.Time) *time.Time {
if t == nil || t.IsZero() {
return nil
}
return t
}
// NewFileMetadata creates a basic FileMetadata entry for hdr.
// The caller must set DigestOffset/EndOffset, and the Chunk* values, separately.
func NewFileMetadata(hdr *tar.Header) (FileMetadata, error) {
typ, err := GetType(hdr.Typeflag)
if err != nil {
return FileMetadata{}, err
}
xattrs := make(map[string]string)
for k, v := range hdr.PAXRecords {
xattrKey, ok := strings.CutPrefix(k, archive.PaxSchilyXattr)
if !ok {
continue
}
xattrs[xattrKey] = base64.StdEncoding.EncodeToString([]byte(v))
}
return FileMetadata{
Type: typ,
Name: hdr.Name,
Linkname: hdr.Linkname,
Mode: hdr.Mode,
Size: hdr.Size,
UID: hdr.Uid,
GID: hdr.Gid,
ModTime: timeIfNotZero(&hdr.ModTime),
AccessTime: timeIfNotZero(&hdr.AccessTime),
ChangeTime: timeIfNotZero(&hdr.ChangeTime),
Devmajor: hdr.Devmajor,
Devminor: hdr.Devminor,
Xattrs: xattrs,
}, nil
}

View file

@ -8,20 +8,18 @@ import (
"fmt"
"hash"
"io"
"io/fs"
"os"
"path/filepath"
"reflect"
"sort"
"strings"
"sync"
"sync/atomic"
"syscall"
"time"
"github.com/containerd/stargz-snapshotter/estargz"
storage "github.com/containers/storage"
graphdriver "github.com/containers/storage/drivers"
driversCopy "github.com/containers/storage/drivers/copy"
"github.com/containers/storage/pkg/archive"
"github.com/containers/storage/pkg/chunked/compressor"
"github.com/containers/storage/pkg/chunked/internal"
@ -29,8 +27,6 @@ import (
"github.com/containers/storage/pkg/fsverity"
"github.com/containers/storage/pkg/idtools"
"github.com/containers/storage/pkg/system"
"github.com/containers/storage/types"
securejoin "github.com/cyphar/filepath-securejoin"
jsoniter "github.com/json-iterator/go"
"github.com/klauspost/compress/zstd"
"github.com/klauspost/pgzip"
@ -42,9 +38,8 @@ import (
const (
maxNumberMissingChunks = 1024
autoMergePartsThreshold = 128 // if the gap between two ranges is below this threshold, automatically merge them.
autoMergePartsThreshold = 1024 // if the gap between two ranges is below this threshold, automatically merge them.
newFileFlags = (unix.O_CREAT | unix.O_TRUNC | unix.O_EXCL | unix.O_WRONLY)
containersOverrideXattr = "user.containers.override_stat"
bigDataKey = "zstd-chunked-manifest"
chunkedData = "zstd-chunked-data"
chunkedLayerDataKey = "zstd-chunked-layer-data"
@ -59,21 +54,6 @@ const (
copyGoRoutines = 32
)
// fileMetadata is a wrapper around internal.FileMetadata with additional private fields that
// are not part of the TOC document.
// Type: TypeChunk entries are stored in Chunks, the primary [fileMetadata] entries never use TypeChunk.
type fileMetadata struct {
internal.FileMetadata
// chunks stores the TypeChunk entries relevant to this entry when FileMetadata.Type == TypeReg.
chunks []*internal.FileMetadata
// skipSetAttrs is set when the file attributes must not be
// modified, e.g. it is a hard link from a different source,
// or a composefs file.
skipSetAttrs bool
}
type compressedFileType int
type chunkedDiffer struct {
@ -111,7 +91,7 @@ type chunkedDiffer struct {
blobSize int64
storeOpts *types.StoreOptions
pullOptions map[string]string
useFsVerity graphdriver.DifferFsVerity
fsVerityDigests map[string]string
@ -127,98 +107,7 @@ type chunkedLayerData struct {
Format graphdriver.DifferOutputFormat `json:"format"`
}
func timeToTimespec(time *time.Time) (ts unix.Timespec) {
if time == nil || time.IsZero() {
// Return UTIME_OMIT special value
ts.Sec = 0
ts.Nsec = ((1 << 30) - 2)
return
}
return unix.NsecToTimespec(time.UnixNano())
}
func doHardLink(srcFd int, destDirFd int, destBase string) error {
doLink := func() error {
// Using unix.AT_EMPTY_PATH requires CAP_DAC_READ_SEARCH while this variant that uses
// /proc/self/fd doesn't and can be used with rootless.
srcPath := fmt.Sprintf("/proc/self/fd/%d", srcFd)
return unix.Linkat(unix.AT_FDCWD, srcPath, destDirFd, destBase, unix.AT_SYMLINK_FOLLOW)
}
err := doLink()
// if the destination exists, unlink it first and try again
if err != nil && os.IsExist(err) {
unix.Unlinkat(destDirFd, destBase, 0)
return doLink()
}
return err
}
func copyFileContent(srcFd int, fileMetadata *fileMetadata, dirfd int, mode os.FileMode, useHardLinks bool) (*os.File, int64, error) {
destFile := fileMetadata.Name
src := fmt.Sprintf("/proc/self/fd/%d", srcFd)
st, err := os.Stat(src)
if err != nil {
return nil, -1, fmt.Errorf("copy file content for %q: %w", destFile, err)
}
copyWithFileRange, copyWithFileClone := true, true
if useHardLinks {
destDirPath := filepath.Dir(destFile)
destBase := filepath.Base(destFile)
destDir, err := openFileUnderRoot(destDirPath, dirfd, 0, mode)
if err == nil {
defer destDir.Close()
err := doHardLink(srcFd, int(destDir.Fd()), destBase)
if err == nil {
// if the file was deduplicated with a hard link, skip overriding file metadata.
fileMetadata.skipSetAttrs = true
return nil, st.Size(), nil
}
}
}
// If the destination file already exists, we shouldn't blow it away
dstFile, err := openFileUnderRoot(destFile, dirfd, newFileFlags, mode)
if err != nil {
return nil, -1, fmt.Errorf("open file %q under rootfs for copy: %w", destFile, err)
}
err = driversCopy.CopyRegularToFile(src, dstFile, st, &copyWithFileRange, &copyWithFileClone)
if err != nil {
dstFile.Close()
return nil, -1, fmt.Errorf("copy to file %q under rootfs: %w", destFile, err)
}
return dstFile, st.Size(), nil
}
type seekableFile struct {
file *os.File
}
func (f *seekableFile) Close() error {
return f.file.Close()
}
func (f *seekableFile) GetBlobAt(chunks []ImageSourceChunk) (chan io.ReadCloser, chan error, error) {
streams := make(chan io.ReadCloser)
errs := make(chan error)
go func() {
for _, chunk := range chunks {
streams <- io.NopCloser(io.NewSectionReader(f.file, int64(chunk.Offset), int64(chunk.Length)))
}
close(streams)
close(errs)
}()
return streams, errs, nil
}
func convertTarToZstdChunked(destDirectory string, payload *os.File) (int64, *seekableFile, digest.Digest, map[string]string, error) {
func (c *chunkedDiffer) convertTarToZstdChunked(destDirectory string, payload *os.File) (int64, *seekableFile, digest.Digest, map[string]string, error) {
diff, err := archive.DecompressStream(payload)
if err != nil {
return 0, nil, "", nil, err
@ -226,7 +115,7 @@ func convertTarToZstdChunked(destDirectory string, payload *os.File) (int64, *se
fd, err := unix.Open(destDirectory, unix.O_TMPFILE|unix.O_RDWR|unix.O_CLOEXEC, 0o600)
if err != nil {
return 0, nil, "", nil, err
return 0, nil, "", nil, &fs.PathError{Op: "open", Path: destDirectory, Err: err}
}
f := os.NewFile(uintptr(fd), destDirectory)
@ -240,7 +129,7 @@ func convertTarToZstdChunked(destDirectory string, payload *os.File) (int64, *se
}
convertedOutputDigester := digest.Canonical.Digester()
copied, err := io.Copy(io.MultiWriter(chunked, convertedOutputDigester.Hash()), diff)
copied, err := io.CopyBuffer(io.MultiWriter(chunked, convertedOutputDigester.Hash()), diff, c.copyBuffer)
if err != nil {
f.Close()
return 0, nil, "", nil, err
@ -249,21 +138,15 @@ func convertTarToZstdChunked(destDirectory string, payload *os.File) (int64, *se
f.Close()
return 0, nil, "", nil, err
}
is := seekableFile{
file: f,
}
return copied, &is, convertedOutputDigester.Digest(), newAnnotations, nil
return copied, newSeekableFile(f), convertedOutputDigester.Digest(), newAnnotations, nil
}
// GetDiffer returns a differ than can be used with ApplyDiffWithDiffer.
func GetDiffer(ctx context.Context, store storage.Store, blobDigest digest.Digest, blobSize int64, annotations map[string]string, iss ImageSourceSeekable) (graphdriver.Differ, error) {
storeOpts, err := types.DefaultStoreOptions()
if err != nil {
return nil, err
}
pullOptions := store.PullOptions()
if !parseBooleanPullOption(&storeOpts, "enable_partial_images", true) {
if !parseBooleanPullOption(pullOptions, "enable_partial_images", true) {
return nil, errors.New("enable_partial_images not configured")
}
@ -279,21 +162,21 @@ func GetDiffer(ctx context.Context, store storage.Store, blobDigest digest.Diges
if err != nil {
return nil, fmt.Errorf("parsing zstd:chunked TOC digest %q: %w", zstdChunkedTOCDigestString, err)
}
return makeZstdChunkedDiffer(ctx, store, blobSize, zstdChunkedTOCDigest, annotations, iss, &storeOpts)
return makeZstdChunkedDiffer(store, blobSize, zstdChunkedTOCDigest, annotations, iss, pullOptions)
}
if hasEstargzTOC {
estargzTOCDigest, err := digest.Parse(estargzTOCDigestString)
if err != nil {
return nil, fmt.Errorf("parsing estargz TOC digest %q: %w", estargzTOCDigestString, err)
}
return makeEstargzChunkedDiffer(ctx, store, blobSize, estargzTOCDigest, iss, &storeOpts)
return makeEstargzChunkedDiffer(store, blobSize, estargzTOCDigest, iss, pullOptions)
}
return makeConvertFromRawDiffer(ctx, store, blobDigest, blobSize, annotations, iss, &storeOpts)
return makeConvertFromRawDiffer(store, blobDigest, blobSize, iss, pullOptions)
}
func makeConvertFromRawDiffer(ctx context.Context, store storage.Store, blobDigest digest.Digest, blobSize int64, annotations map[string]string, iss ImageSourceSeekable, storeOpts *types.StoreOptions) (*chunkedDiffer, error) {
if !parseBooleanPullOption(storeOpts, "convert_images", false) {
func makeConvertFromRawDiffer(store storage.Store, blobDigest digest.Digest, blobSize int64, iss ImageSourceSeekable, pullOptions map[string]string) (*chunkedDiffer, error) {
if !parseBooleanPullOption(pullOptions, "convert_images", false) {
return nil, errors.New("convert_images not configured")
}
@ -309,12 +192,12 @@ func makeConvertFromRawDiffer(ctx context.Context, store storage.Store, blobDige
convertToZstdChunked: true,
copyBuffer: makeCopyBuffer(),
layersCache: layersCache,
storeOpts: storeOpts,
pullOptions: pullOptions,
stream: iss,
}, nil
}
func makeZstdChunkedDiffer(ctx context.Context, store storage.Store, blobSize int64, tocDigest digest.Digest, annotations map[string]string, iss ImageSourceSeekable, storeOpts *types.StoreOptions) (*chunkedDiffer, error) {
func makeZstdChunkedDiffer(store storage.Store, blobSize int64, tocDigest digest.Digest, annotations map[string]string, iss ImageSourceSeekable, pullOptions map[string]string) (*chunkedDiffer, error) {
manifest, toc, tarSplit, tocOffset, err := readZstdChunkedManifest(iss, tocDigest, annotations)
if err != nil {
return nil, fmt.Errorf("read zstd:chunked manifest: %w", err)
@ -333,14 +216,14 @@ func makeZstdChunkedDiffer(ctx context.Context, store storage.Store, blobSize in
layersCache: layersCache,
manifest: manifest,
toc: toc,
storeOpts: storeOpts,
pullOptions: pullOptions,
stream: iss,
tarSplit: tarSplit,
tocOffset: tocOffset,
}, nil
}
func makeEstargzChunkedDiffer(ctx context.Context, store storage.Store, blobSize int64, tocDigest digest.Digest, iss ImageSourceSeekable, storeOpts *types.StoreOptions) (*chunkedDiffer, error) {
func makeEstargzChunkedDiffer(store storage.Store, blobSize int64, tocDigest digest.Digest, iss ImageSourceSeekable, pullOptions map[string]string) (*chunkedDiffer, error) {
manifest, tocOffset, err := readEstargzChunkedManifest(iss, blobSize, tocDigest)
if err != nil {
return nil, fmt.Errorf("read zstd:chunked manifest: %w", err)
@ -358,7 +241,7 @@ func makeEstargzChunkedDiffer(ctx context.Context, store storage.Store, blobSize
fileType: fileTypeEstargz,
layersCache: layersCache,
manifest: manifest,
storeOpts: storeOpts,
pullOptions: pullOptions,
stream: iss,
tocOffset: tocOffset,
}, nil
@ -375,15 +258,15 @@ func makeCopyBuffer() []byte {
// dirfd is an open file descriptor to the destination root directory.
// useHardLinks defines whether the deduplication can be performed using hard links.
func copyFileFromOtherLayer(file *fileMetadata, source string, name string, dirfd int, useHardLinks bool) (bool, *os.File, int64, error) {
srcDirfd, err := unix.Open(source, unix.O_RDONLY, 0)
srcDirfd, err := unix.Open(source, unix.O_RDONLY|unix.O_CLOEXEC, 0)
if err != nil {
return false, nil, 0, fmt.Errorf("open source file: %w", err)
return false, nil, 0, &fs.PathError{Op: "open", Path: source, Err: err}
}
defer unix.Close(srcDirfd)
srcFile, err := openFileUnderRoot(name, srcDirfd, unix.O_RDONLY, 0)
srcFile, err := openFileUnderRoot(srcDirfd, name, unix.O_RDONLY|syscall.O_CLOEXEC, 0)
if err != nil {
return false, nil, 0, fmt.Errorf("open source file under target rootfs (%s): %w", name, err)
return false, nil, 0, err
}
defer srcFile.Close()
@ -420,7 +303,7 @@ func canDedupFileWithHardLink(file *fileMetadata, fd int, s os.FileInfo) bool {
return false
}
path := fmt.Sprintf("/proc/self/fd/%d", fd)
path := procPathForFd(fd)
listXattrs, err := system.Llistxattr(path)
if err != nil {
@ -476,7 +359,7 @@ func findFileInOSTreeRepos(file *fileMetadata, ostreeRepos []string, dirfd int,
if st.Size() != file.Size {
continue
}
fd, err := unix.Open(sourceFile, unix.O_RDONLY|unix.O_NONBLOCK, 0)
fd, err := unix.Open(sourceFile, unix.O_RDONLY|unix.O_NONBLOCK|unix.O_CLOEXEC, 0)
if err != nil {
logrus.Debugf("could not open sourceFile %s: %v", sourceFile, err)
return false, nil, 0, nil
@ -585,15 +468,15 @@ type missingPart struct {
}
func (o *originFile) OpenFile() (io.ReadCloser, error) {
srcDirfd, err := unix.Open(o.Root, unix.O_RDONLY, 0)
srcDirfd, err := unix.Open(o.Root, unix.O_RDONLY|unix.O_CLOEXEC, 0)
if err != nil {
return nil, fmt.Errorf("open source file: %w", err)
return nil, &fs.PathError{Op: "open", Path: o.Root, Err: err}
}
defer unix.Close(srcDirfd)
srcFile, err := openFileUnderRoot(o.Path, srcDirfd, unix.O_RDONLY, 0)
srcFile, err := openFileUnderRoot(srcDirfd, o.Path, unix.O_RDONLY|unix.O_CLOEXEC, 0)
if err != nil {
return nil, fmt.Errorf("open source file under target rootfs: %w", err)
return nil, err
}
if _, err := srcFile.Seek(o.Offset, 0); err != nil {
@ -603,253 +486,6 @@ func (o *originFile) OpenFile() (io.ReadCloser, error) {
return srcFile, nil
}
// setFileAttrs sets the file attributes for file given metadata
func setFileAttrs(dirfd int, file *os.File, mode os.FileMode, metadata *fileMetadata, options *archive.TarOptions, usePath bool) error {
if metadata.skipSetAttrs {
return nil
}
if file == nil || file.Fd() < 0 {
return errors.New("invalid file")
}
fd := int(file.Fd())
t, err := typeToTarType(metadata.Type)
if err != nil {
return err
}
// If it is a symlink, force to use the path
if t == tar.TypeSymlink {
usePath = true
}
baseName := ""
if usePath {
dirName := filepath.Dir(metadata.Name)
if dirName != "" {
parentFd, err := openFileUnderRoot(dirName, dirfd, unix.O_PATH|unix.O_DIRECTORY, 0)
if err != nil {
return err
}
defer parentFd.Close()
dirfd = int(parentFd.Fd())
}
baseName = filepath.Base(metadata.Name)
}
doChown := func() error {
if usePath {
return unix.Fchownat(dirfd, baseName, metadata.UID, metadata.GID, unix.AT_SYMLINK_NOFOLLOW)
}
return unix.Fchown(fd, metadata.UID, metadata.GID)
}
doSetXattr := func(k string, v []byte) error {
return unix.Fsetxattr(fd, k, v, 0)
}
doUtimes := func() error {
ts := []unix.Timespec{timeToTimespec(metadata.AccessTime), timeToTimespec(metadata.ModTime)}
if usePath {
return unix.UtimesNanoAt(dirfd, baseName, ts, unix.AT_SYMLINK_NOFOLLOW)
}
return unix.UtimesNanoAt(unix.AT_FDCWD, fmt.Sprintf("/proc/self/fd/%d", fd), ts, 0)
}
doChmod := func() error {
if usePath {
return unix.Fchmodat(dirfd, baseName, uint32(mode), unix.AT_SYMLINK_NOFOLLOW)
}
return unix.Fchmod(fd, uint32(mode))
}
if err := doChown(); err != nil {
if !options.IgnoreChownErrors {
return fmt.Errorf("chown %q to %d:%d: %w", metadata.Name, metadata.UID, metadata.GID, err)
}
}
canIgnore := func(err error) bool {
return err == nil || errors.Is(err, unix.ENOSYS) || errors.Is(err, unix.ENOTSUP)
}
for k, v := range metadata.Xattrs {
if _, found := xattrsToIgnore[k]; found {
continue
}
data, err := base64.StdEncoding.DecodeString(v)
if err != nil {
return fmt.Errorf("decode xattr %q: %w", v, err)
}
if err := doSetXattr(k, data); !canIgnore(err) {
return fmt.Errorf("set xattr %s=%q for %q: %w", k, data, metadata.Name, err)
}
}
if err := doUtimes(); !canIgnore(err) {
return fmt.Errorf("set utimes for %q: %w", metadata.Name, err)
}
if err := doChmod(); !canIgnore(err) {
return fmt.Errorf("chmod %q: %w", metadata.Name, err)
}
return nil
}
func openFileUnderRootFallback(dirfd int, name string, flags uint64, mode os.FileMode) (int, error) {
root := fmt.Sprintf("/proc/self/fd/%d", dirfd)
targetRoot, err := os.Readlink(root)
if err != nil {
return -1, err
}
hasNoFollow := (flags & unix.O_NOFOLLOW) != 0
var fd int
// If O_NOFOLLOW is specified in the flags, then resolve only the parent directory and use the
// last component as the path to openat().
if hasNoFollow {
dirName := filepath.Dir(name)
if dirName != "" {
newRoot, err := securejoin.SecureJoin(root, filepath.Dir(name))
if err != nil {
return -1, err
}
root = newRoot
}
parentDirfd, err := unix.Open(root, unix.O_PATH, 0)
if err != nil {
return -1, err
}
defer unix.Close(parentDirfd)
fd, err = unix.Openat(parentDirfd, filepath.Base(name), int(flags), uint32(mode))
if err != nil {
return -1, err
}
} else {
newPath, err := securejoin.SecureJoin(root, name)
if err != nil {
return -1, err
}
fd, err = unix.Openat(dirfd, newPath, int(flags), uint32(mode))
if err != nil {
return -1, err
}
}
target, err := os.Readlink(fmt.Sprintf("/proc/self/fd/%d", fd))
if err != nil {
unix.Close(fd)
return -1, err
}
// Add an additional check to make sure the opened fd is inside the rootfs
if !strings.HasPrefix(target, targetRoot) {
unix.Close(fd)
return -1, fmt.Errorf("while resolving %q. It resolves outside the root directory", name)
}
return fd, err
}
func openFileUnderRootOpenat2(dirfd int, name string, flags uint64, mode os.FileMode) (int, error) {
how := unix.OpenHow{
Flags: flags,
Mode: uint64(mode & 0o7777),
Resolve: unix.RESOLVE_IN_ROOT,
}
return unix.Openat2(dirfd, name, &how)
}
// skipOpenat2 is set when openat2 is not supported by the underlying kernel and avoid
// using it again.
var skipOpenat2 int32
// openFileUnderRootRaw tries to open a file using openat2 and if it is not supported fallbacks to a
// userspace lookup.
func openFileUnderRootRaw(dirfd int, name string, flags uint64, mode os.FileMode) (int, error) {
var fd int
var err error
if atomic.LoadInt32(&skipOpenat2) > 0 {
fd, err = openFileUnderRootFallback(dirfd, name, flags, mode)
} else {
fd, err = openFileUnderRootOpenat2(dirfd, name, flags, mode)
// If the function failed with ENOSYS, switch off the support for openat2
// and fallback to using safejoin.
if err != nil && errors.Is(err, unix.ENOSYS) {
atomic.StoreInt32(&skipOpenat2, 1)
fd, err = openFileUnderRootFallback(dirfd, name, flags, mode)
}
}
return fd, err
}
// openFileUnderRoot safely opens a file under the specified root directory using openat2
// name is the path to open relative to dirfd.
// dirfd is an open file descriptor to the target checkout directory.
// flags are the flags to pass to the open syscall.
// mode specifies the mode to use for newly created files.
func openFileUnderRoot(name string, dirfd int, flags uint64, mode os.FileMode) (*os.File, error) {
fd, err := openFileUnderRootRaw(dirfd, name, flags, mode)
if err == nil {
return os.NewFile(uintptr(fd), name), nil
}
hasCreate := (flags & unix.O_CREAT) != 0
if errors.Is(err, unix.ENOENT) && hasCreate {
parent := filepath.Dir(name)
if parent != "" {
newDirfd, err2 := openOrCreateDirUnderRoot(parent, dirfd, 0)
if err2 == nil {
defer newDirfd.Close()
fd, err := openFileUnderRootRaw(int(newDirfd.Fd()), filepath.Base(name), flags, mode)
if err == nil {
return os.NewFile(uintptr(fd), name), nil
}
}
}
}
return nil, fmt.Errorf("open %q under the rootfs: %w", name, err)
}
// openOrCreateDirUnderRoot safely opens a directory or create it if it is missing.
// name is the path to open relative to dirfd.
// dirfd is an open file descriptor to the target checkout directory.
// mode specifies the mode to use for newly created files.
func openOrCreateDirUnderRoot(name string, dirfd int, mode os.FileMode) (*os.File, error) {
fd, err := openFileUnderRootRaw(dirfd, name, unix.O_DIRECTORY|unix.O_RDONLY, mode)
if err == nil {
return os.NewFile(uintptr(fd), name), nil
}
if errors.Is(err, unix.ENOENT) {
parent := filepath.Dir(name)
if parent != "" {
pDir, err2 := openOrCreateDirUnderRoot(parent, dirfd, mode)
if err2 != nil {
return nil, err
}
defer pDir.Close()
baseName := filepath.Base(name)
if err2 := unix.Mkdirat(int(pDir.Fd()), baseName, 0o755); err2 != nil {
return nil, err
}
fd, err = openFileUnderRootRaw(int(pDir.Fd()), baseName, unix.O_DIRECTORY|unix.O_RDONLY, mode)
if err == nil {
return os.NewFile(uintptr(fd), name), nil
}
}
}
return nil, err
}
func (c *chunkedDiffer) prepareCompressedStreamToFile(partCompression compressedFileType, from io.Reader, mf *missingFileChunk) (compressedFileType, error) {
switch {
case partCompression == fileTypeHole:
@ -918,23 +554,14 @@ func hashHole(h hash.Hash, size int64, copyBuffer []byte) error {
return nil
}
// appendHole creates a hole with the specified size at the open fd.
func appendHole(fd int, size int64) error {
off, err := unix.Seek(fd, size, unix.SEEK_CUR)
if err != nil {
return err
}
// Make sure the file size is changed. It might be the last hole and no other data written afterwards.
if err := unix.Ftruncate(fd, off); err != nil {
return err
}
return nil
}
func (c *chunkedDiffer) appendCompressedStreamToFile(compression compressedFileType, destFile *destinationFile, size int64) error {
switch compression {
case fileTypeZstdChunked:
defer c.zstdReader.Reset(nil)
defer func() {
if err := c.zstdReader.Reset(nil); err != nil {
logrus.Warnf("release of references to the previous zstd reader failed: %v", err)
}
}()
if _, err := io.CopyBuffer(destFile.to, io.LimitReader(c.zstdReader, size), c.copyBuffer); err != nil {
return err
}
@ -948,7 +575,7 @@ func (c *chunkedDiffer) appendCompressedStreamToFile(compression compressedFileT
return err
}
case fileTypeHole:
if err := appendHole(int(destFile.file.Fd()), size); err != nil {
if err := appendHole(int(destFile.file.Fd()), destFile.metadata.Name, size); err != nil {
return err
}
if destFile.hash != nil {
@ -977,7 +604,7 @@ type destinationFile struct {
}
func openDestinationFile(dirfd int, metadata *fileMetadata, options *archive.TarOptions, skipValidation bool, recordFsVerity recordFsVerityFunc) (*destinationFile, error) {
file, err := openFileUnderRoot(metadata.Name, dirfd, newFileFlags, 0)
file, err := openFileUnderRoot(dirfd, metadata.Name, newFileFlags, 0)
if err != nil {
return nil, err
}
@ -1080,7 +707,7 @@ func (c *chunkedDiffer) recordFsVerity(path string, roFile *os.File) error {
return nil
}
func (c *chunkedDiffer) storeMissingFiles(streams chan io.ReadCloser, errs chan error, dest string, dirfd int, missingParts []missingPart, options *archive.TarOptions) (Err error) {
func (c *chunkedDiffer) storeMissingFiles(streams chan io.ReadCloser, errs chan error, dirfd int, missingParts []missingPart, options *archive.TarOptions) (Err error) {
var destFile *destinationFile
filesToClose := make(chan *destinationFile, 3)
@ -1294,7 +921,7 @@ func mergeMissingChunks(missingParts []missingPart, target int) []missingPart {
return newMissingParts
}
func (c *chunkedDiffer) retrieveMissingFiles(stream ImageSourceSeekable, dest string, dirfd int, missingParts []missingPart, options *archive.TarOptions) error {
func (c *chunkedDiffer) retrieveMissingFiles(stream ImageSourceSeekable, dirfd int, missingParts []missingPart, options *archive.TarOptions) error {
var chunksToRequest []ImageSourceChunk
calculateChunksToRequest := func() {
@ -1333,167 +960,12 @@ func (c *chunkedDiffer) retrieveMissingFiles(stream ImageSourceSeekable, dest st
return err
}
if err := c.storeMissingFiles(streams, errs, dest, dirfd, missingParts, options); err != nil {
if err := c.storeMissingFiles(streams, errs, dirfd, missingParts, options); err != nil {
return err
}
return nil
}
func safeMkdir(dirfd int, mode os.FileMode, name string, metadata *fileMetadata, options *archive.TarOptions) error {
parent := filepath.Dir(name)
base := filepath.Base(name)
parentFd := dirfd
if parent != "." {
parentFile, err := openOrCreateDirUnderRoot(parent, dirfd, 0)
if err != nil {
return err
}
defer parentFile.Close()
parentFd = int(parentFile.Fd())
}
if err := unix.Mkdirat(parentFd, base, uint32(mode)); err != nil {
if !os.IsExist(err) {
return fmt.Errorf("mkdir %q: %w", name, err)
}
}
file, err := openFileUnderRoot(base, parentFd, unix.O_DIRECTORY|unix.O_RDONLY, 0)
if err != nil {
return err
}
defer file.Close()
return setFileAttrs(dirfd, file, mode, metadata, options, false)
}
func safeLink(dirfd int, mode os.FileMode, metadata *fileMetadata, options *archive.TarOptions) error {
sourceFile, err := openFileUnderRoot(metadata.Linkname, dirfd, unix.O_PATH|unix.O_RDONLY|unix.O_NOFOLLOW, 0)
if err != nil {
return err
}
defer sourceFile.Close()
destDir, destBase := filepath.Dir(metadata.Name), filepath.Base(metadata.Name)
destDirFd := dirfd
if destDir != "." {
f, err := openOrCreateDirUnderRoot(destDir, dirfd, 0)
if err != nil {
return err
}
defer f.Close()
destDirFd = int(f.Fd())
}
err = doHardLink(int(sourceFile.Fd()), destDirFd, destBase)
if err != nil {
return fmt.Errorf("create hardlink %q pointing to %q: %w", metadata.Name, metadata.Linkname, err)
}
newFile, err := openFileUnderRoot(metadata.Name, dirfd, unix.O_WRONLY|unix.O_NOFOLLOW, 0)
if err != nil {
// If the target is a symlink, open the file with O_PATH.
if errors.Is(err, unix.ELOOP) {
newFile, err := openFileUnderRoot(metadata.Name, dirfd, unix.O_PATH|unix.O_NOFOLLOW, 0)
if err != nil {
return err
}
defer newFile.Close()
return setFileAttrs(dirfd, newFile, mode, metadata, options, true)
}
return err
}
defer newFile.Close()
return setFileAttrs(dirfd, newFile, mode, metadata, options, false)
}
func safeSymlink(dirfd int, mode os.FileMode, metadata *fileMetadata, options *archive.TarOptions) error {
destDir, destBase := filepath.Dir(metadata.Name), filepath.Base(metadata.Name)
destDirFd := dirfd
if destDir != "." {
f, err := openOrCreateDirUnderRoot(destDir, dirfd, 0)
if err != nil {
return err
}
defer f.Close()
destDirFd = int(f.Fd())
}
if err := unix.Symlinkat(metadata.Linkname, destDirFd, destBase); err != nil {
return fmt.Errorf("create symlink %q pointing to %q: %w", metadata.Name, metadata.Linkname, err)
}
return nil
}
type whiteoutHandler struct {
Dirfd int
Root string
}
func (d whiteoutHandler) Setxattr(path, name string, value []byte) error {
file, err := openOrCreateDirUnderRoot(path, d.Dirfd, 0)
if err != nil {
return err
}
defer file.Close()
if err := unix.Fsetxattr(int(file.Fd()), name, value, 0); err != nil {
return fmt.Errorf("set xattr %s=%q for %q: %w", name, value, path, err)
}
return nil
}
func (d whiteoutHandler) Mknod(path string, mode uint32, dev int) error {
dir := filepath.Dir(path)
base := filepath.Base(path)
dirfd := d.Dirfd
if dir != "" {
dir, err := openOrCreateDirUnderRoot(dir, d.Dirfd, 0)
if err != nil {
return err
}
defer dir.Close()
dirfd = int(dir.Fd())
}
if err := unix.Mknodat(dirfd, base, mode, dev); err != nil {
return fmt.Errorf("mknod %q: %w", path, err)
}
return nil
}
func checkChownErr(err error, name string, uid, gid int) error {
if errors.Is(err, syscall.EINVAL) {
return fmt.Errorf(`potentially insufficient UIDs or GIDs available in user namespace (requested %d:%d for %s): Check /etc/subuid and /etc/subgid if configured locally and run "podman system migrate": %w`, uid, gid, name, err)
}
return err
}
func (d whiteoutHandler) Chown(path string, uid, gid int) error {
file, err := openFileUnderRoot(path, d.Dirfd, unix.O_PATH, 0)
if err != nil {
return err
}
defer file.Close()
if err := unix.Fchownat(int(file.Fd()), "", uid, gid, unix.AT_EMPTY_PATH); err != nil {
var stat unix.Stat_t
if unix.Fstat(int(file.Fd()), &stat) == nil {
if stat.Uid == uint32(uid) && stat.Gid == uint32(gid) {
return nil
}
}
return checkChownErr(err, path, uid, gid)
}
return nil
}
type hardLinkToCreate struct {
dest string
dirfd int
@ -1501,8 +973,8 @@ type hardLinkToCreate struct {
metadata *fileMetadata
}
func parseBooleanPullOption(storeOpts *storage.StoreOptions, name string, def bool) bool {
if value, ok := storeOpts.PullOptions[name]; ok {
func parseBooleanPullOption(pullOptions map[string]string, name string, def bool) bool {
if value, ok := pullOptions[name]; ok {
return strings.ToLower(value) == "true"
}
return def
@ -1515,10 +987,10 @@ type findAndCopyFileOptions struct {
}
func reopenFileReadOnly(f *os.File) (*os.File, error) {
path := fmt.Sprintf("/proc/self/fd/%d", f.Fd())
path := procPathForFile(f)
fd, err := unix.Open(path, unix.O_RDONLY|unix.O_CLOEXEC, 0)
if err != nil {
return nil, err
return nil, &fs.PathError{Op: "open", Path: path, Err: err}
}
return os.NewFile(uintptr(fd), f.Name()), nil
}
@ -1636,7 +1108,7 @@ func (c *chunkedDiffer) copyAllBlobToFile(destination *os.File) (digest.Digest,
r := io.TeeReader(payload, originalRawDigester.Hash())
// copy the entire tarball and compute its digest
_, err = io.Copy(destination, r)
_, err = io.CopyBuffer(destination, r, c.copyBuffer)
return originalRawDigester.Digest(), err
}
@ -1654,13 +1126,14 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
// stream to use for reading the zstd:chunked or Estargz file.
stream := c.stream
var compressedDigest digest.Digest
var uncompressedDigest digest.Digest
var convertedBlobSize int64
if c.convertToZstdChunked {
fd, err := unix.Open(dest, unix.O_TMPFILE|unix.O_RDWR|unix.O_CLOEXEC, 0o600)
if err != nil {
return graphdriver.DriverWithDifferOutput{}, err
return graphdriver.DriverWithDifferOutput{}, &fs.PathError{Op: "open", Path: dest, Err: err}
}
blobFile := os.NewFile(uintptr(fd), "blob-file")
defer func() {
@ -1670,7 +1143,7 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
}()
// calculate the checksum before accessing the file.
compressedDigest, err := c.copyAllBlobToFile(blobFile)
compressedDigest, err = c.copyAllBlobToFile(blobFile)
if err != nil {
return graphdriver.DriverWithDifferOutput{}, err
}
@ -1683,7 +1156,7 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
return graphdriver.DriverWithDifferOutput{}, err
}
tarSize, fileSource, diffID, annotations, err := convertTarToZstdChunked(dest, blobFile)
tarSize, fileSource, diffID, annotations, err := c.convertTarToZstdChunked(dest, blobFile)
if err != nil {
return graphdriver.DriverWithDifferOutput{}, err
}
@ -1756,14 +1229,15 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
},
TOCDigest: c.tocDigest,
UncompressedDigest: uncompressedDigest,
CompressedDigest: compressedDigest,
}
// When the hard links deduplication is used, file attributes are ignored because setting them
// modifies the source file as well.
useHardLinks := parseBooleanPullOption(c.storeOpts, "use_hard_links", false)
useHardLinks := parseBooleanPullOption(c.pullOptions, "use_hard_links", false)
// List of OSTree repositories to use for deduplication
ostreeRepos := strings.Split(c.storeOpts.PullOptions["ostree_repos"], ":")
ostreeRepos := strings.Split(c.pullOptions["ostree_repos"], ":")
whiteoutConverter := archive.GetWhiteoutConverter(options.WhiteoutFormat, options.WhiteoutData)
@ -1790,16 +1264,19 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
if options.ForceMask != nil {
uid, gid, mode, err := archive.GetFileOwner(dest)
if err == nil {
value := fmt.Sprintf("%d:%d:0%o", uid, gid, mode)
if err := unix.Setxattr(dest, containersOverrideXattr, []byte(value), 0); err != nil {
value := idtools.Stat{
IDs: idtools.IDPair{UID: int(uid), GID: int(gid)},
Mode: os.FileMode(mode),
}
if err := idtools.SetContainersOverrideXattr(dest, value); err != nil {
return output, err
}
}
}
dirfd, err := unix.Open(dest, unix.O_RDONLY|unix.O_PATH, 0)
dirfd, err := unix.Open(dest, unix.O_RDONLY|unix.O_PATH|unix.O_CLOEXEC, 0)
if err != nil {
return output, fmt.Errorf("cannot open %q: %w", dest, err)
return output, &fs.PathError{Op: "open", Path: dest, Err: err}
}
defer unix.Close(dirfd)
@ -1812,7 +1289,9 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
for _, e := range mergedEntries {
d := e.Name[0:2]
if _, found := createdDirs[d]; !found {
unix.Mkdirat(dirfd, d, 0o755)
if err := unix.Mkdirat(dirfd, d, 0o755); err != nil {
return output, &fs.PathError{Op: "mkdirat", Path: d, Err: err}
}
createdDirs[d] = struct{}{}
}
}
@ -1868,11 +1347,14 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
}
filesToWaitFor := 0
for i, r := range mergedEntries {
for i := range mergedEntries {
r := &mergedEntries[i]
if options.ForceMask != nil {
value := fmt.Sprintf("%d:%d:0%o", r.UID, r.GID, r.Mode&0o7777)
r.Xattrs[containersOverrideXattr] = base64.StdEncoding.EncodeToString([]byte(value))
r.Mode = int64(*options.ForceMask)
value := idtools.FormatContainersOverrideXattr(r.UID, r.GID, int(r.Mode))
if r.Xattrs == nil {
r.Xattrs = make(map[string]string)
}
r.Xattrs[idtools.ContainersOverrideXattr] = base64.StdEncoding.EncodeToString([]byte(value))
}
mode := os.FileMode(r.Mode)
@ -1916,12 +1398,12 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
if r.Size == 0 {
// Used to have a scope for cleanup.
createEmptyFile := func() error {
file, err := openFileUnderRoot(r.Name, dirfd, newFileFlags, 0)
file, err := openFileUnderRoot(dirfd, r.Name, newFileFlags, 0)
if err != nil {
return err
}
defer file.Close()
if err := setFileAttrs(dirfd, file, mode, &r, options, false); err != nil {
if err := setFileAttrs(dirfd, file, mode, r, options, false); err != nil {
return err
}
return nil
@ -1936,7 +1418,7 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
if r.Name == "" || r.Name == "." {
output.RootDirMode = &mode
}
if err := safeMkdir(dirfd, mode, r.Name, &r, options); err != nil {
if err := safeMkdir(dirfd, mode, r.Name, r, options); err != nil {
return output, err
}
continue
@ -1950,12 +1432,12 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
dest: dest,
dirfd: dirfd,
mode: mode,
metadata: &r,
metadata: r,
})
continue
case tar.TypeSymlink:
if err := safeSymlink(dirfd, mode, &r, options); err != nil {
if err := safeSymlink(dirfd, r); err != nil {
return output, err
}
continue
@ -2057,7 +1539,7 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
}
// There are some missing files. Prepare a multirange request for the missing chunks.
if len(missingParts) > 0 {
if err := c.retrieveMissingFiles(stream, dest, dirfd, missingParts, options); err != nil {
if err := c.retrieveMissingFiles(stream, dirfd, missingParts, options); err != nil {
return output, err
}
}
@ -2167,13 +1649,13 @@ func (c *chunkedDiffer) mergeTocEntries(fileType compressedFileType, entries []i
// validateChunkChecksum checks if the file at $root/$path[offset:chunk.ChunkSize] has the
// same digest as chunk.ChunkDigest
func validateChunkChecksum(chunk *internal.FileMetadata, root, path string, offset int64, copyBuffer []byte) bool {
parentDirfd, err := unix.Open(root, unix.O_PATH, 0)
parentDirfd, err := unix.Open(root, unix.O_PATH|unix.O_CLOEXEC, 0)
if err != nil {
return false
}
defer unix.Close(parentDirfd)
fd, err := openFileUnderRoot(path, parentDirfd, unix.O_RDONLY, 0)
fd, err := openFileUnderRoot(parentDirfd, path, unix.O_RDONLY|unix.O_CLOEXEC, 0)
if err != nil {
return false
}

View file

@ -0,0 +1,68 @@
package chunked
import (
"bytes"
"fmt"
"io"
"github.com/vbatts/tar-split/archive/tar"
"github.com/vbatts/tar-split/tar/storage"
)
// iterateTarSplit calls handler for each tar header in tarSplit
func iterateTarSplit(tarSplit []byte, handler func(hdr *tar.Header) error) error {
// This, strictly speaking, hard-codes undocumented assumptions about how github.com/vbatts/tar-split/tar/asm.NewInputTarStream
// forms the tar-split contents. Pragmatically, NewInputTarStream should always produce storage.FileType entries at least
// for every non-empty file, which constraints it basically to the output we expect.
//
// Specifically, we assume:
// - There is a separate SegmentType entry for every tar header, but only one SegmentType entry for the full header incl. any extensions
// - (There is a FileType entry for every tar header, we ignore it)
// - Trailing padding of a file, if any, is included in the next SegmentType entry
// - At the end, there may be SegmentType entries just for the terminating zero blocks.
unpacker := storage.NewJSONUnpacker(bytes.NewReader(tarSplit))
for {
tsEntry, err := unpacker.Next()
if err != nil {
if err == io.EOF {
return nil
}
return fmt.Errorf("reading tar-split entries: %w", err)
}
switch tsEntry.Type {
case storage.SegmentType:
payload := tsEntry.Payload
// This is horrible, but we dont know how much padding to skip. (It can be computed from the previous hdr.Size for non-sparse
// files, but for sparse files that is set to the logical size.)
//
// First, assume that all padding is zero bytes.
// A tar header starts with a file name, which might in principle be empty, but
// at least https://github.com/opencontainers/image-spec/blob/main/layer.md#populate-initial-filesystem suggests that
// the tar name should never be empty (it should be ".", or maybe "./").
//
// This will cause us to skip all zero bytes in the trailing blocks, but thats fine.
i := 0
for i < len(payload) && payload[i] == 0 {
i++
}
payload = payload[i:]
tr := tar.NewReader(bytes.NewReader(payload))
hdr, err := tr.Next()
if err != nil {
if err == io.EOF { // Probably the last entry, but lets let the unpacker drive that.
break
}
return fmt.Errorf("decoding a tar header from a tar-split entry: %w", err)
}
if err := handler(hdr); err != nil {
return err
}
case storage.FileType:
// Nothing
default:
return fmt.Errorf("unexpected tar-split entry type %q", tsEntry.Type)
}
}
}

View file

@ -75,10 +75,6 @@ type OptionsConfig struct {
// Size
Size string `toml:"size,omitempty"`
// RemapUIDs is a list of default UID mappings to use for layers.
RemapUIDs string `toml:"remap-uids,omitempty"`
// RemapGIDs is a list of default GID mappings to use for layers.
RemapGIDs string `toml:"remap-gids,omitempty"`
// IgnoreChownErrors is a flag for whether chown errors should be
// ignored when building an image.
IgnoreChownErrors string `toml:"ignore_chown_errors,omitempty"`
@ -90,13 +86,6 @@ type OptionsConfig struct {
// files and directories.
ForceMask os.FileMode `toml:"force_mask,omitempty"`
// RemapUser is the name of one or more entries in /etc/subuid which
// should be used to set up default UID mappings.
RemapUser string `toml:"remap-user,omitempty"`
// RemapGroup is the name of one or more entries in /etc/subgid which
// should be used to set up default GID mappings.
RemapGroup string `toml:"remap-group,omitempty"`
// RootAutoUsernsUser is the name of one or more entries in /etc/subuid and
// /etc/subgid which should be used to set up automatically a userns.
RootAutoUsernsUser string `toml:"root-auto-userns-user,omitempty"`

View file

@ -1,5 +1,5 @@
//go:build linux || darwin || freebsd || solaris
// +build linux darwin freebsd solaris
//go:build !windows
// +build !windows
package directory

View file

@ -1,38 +0,0 @@
//go:build !linux && !darwin && !freebsd && !windows
// +build !linux,!darwin,!freebsd,!windows
package homedir
// Copyright 2013-2018 Docker, Inc.
// NOTE: this package has originally been copied from github.com/docker/docker.
import (
"errors"
"os"
"path/filepath"
)
// GetRuntimeDir is unsupported on non-linux system.
func GetRuntimeDir() (string, error) {
return "", errors.New("homedir.GetRuntimeDir() is not supported on this system")
}
// StickRuntimeDirContents is unsupported on non-linux system.
func StickRuntimeDirContents(files []string) ([]string, error) {
return nil, errors.New("homedir.StickRuntimeDirContents() is not supported on this system")
}
// GetConfigHome returns XDG_CONFIG_HOME.
// GetConfigHome returns $HOME/.config and nil error if XDG_CONFIG_HOME is not set.
//
// See also https://standards.freedesktop.org/basedir-spec/latest/ar01s03.html
func GetConfigHome() (string, error) {
if xdgConfigHome := os.Getenv("XDG_CONFIG_HOME"); xdgConfigHome != "" {
return xdgConfigHome, nil
}
home := Get()
if home == "" {
return "", errors.New("could not get either XDG_CONFIG_HOME or HOME")
}
return filepath.Join(home, ".config"), nil
}

View file

@ -10,6 +10,7 @@ import (
"syscall"
"github.com/containers/storage/pkg/idtools"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
@ -61,12 +62,20 @@ func CreateUsernsProcess(uidMaps []idtools.IDMap, gidMaps []idtools.IDMap) (int,
_ = unix.Prctl(unix.PR_SET_PDEATHSIG, uintptr(unix.SIGKILL), 0, 0, 0)
// just wait for the SIGKILL
for {
syscall.Pause()
_ = syscall.Pause()
}
}
cleanupFunc := func() {
unix.Kill(int(pid), unix.SIGKILL)
_, _ = unix.Wait4(int(pid), nil, 0, nil)
err1 := unix.Kill(int(pid), unix.SIGKILL)
if err1 != nil && err1 != syscall.ESRCH {
logrus.Warnf("kill process pid: %d with SIGKILL ended with error: %v", int(pid), err1)
}
if err1 != nil {
return
}
if _, err := unix.Wait4(int(pid), nil, 0, nil); err != nil {
logrus.Warnf("wait4 pid: %d ended with error: %v", int(pid), err)
}
}
writeMappings := func(fname string, idmap []idtools.IDMap) error {
mappings := ""

View file

@ -367,21 +367,77 @@ func checkChownErr(err error, name string, uid, gid int) error {
return err
}
// Stat contains file states that can be overriden with ContainersOverrideXattr.
type Stat struct {
IDs IDPair
Mode os.FileMode
}
// FormatContainersOverrideXattr will format the given uid, gid, and mode into a string
// that can be used as the value for the ContainersOverrideXattr xattr.
func FormatContainersOverrideXattr(uid, gid, mode int) string {
return fmt.Sprintf("%d:%d:0%o", uid, gid, mode&0o7777)
}
// GetContainersOverrideXattr will get and decode ContainersOverrideXattr.
func GetContainersOverrideXattr(path string) (Stat, error) {
var stat Stat
xstat, err := system.Lgetxattr(path, ContainersOverrideXattr)
if err != nil {
return stat, err
}
attrs := strings.Split(string(xstat), ":")
if len(attrs) != 3 {
return stat, fmt.Errorf("The number of clons in %s does not equal to 3",
ContainersOverrideXattr)
}
value, err := strconv.ParseUint(attrs[0], 10, 32)
if err != nil {
return stat, fmt.Errorf("Failed to parse UID: %w", err)
}
stat.IDs.UID = int(value)
value, err = strconv.ParseUint(attrs[0], 10, 32)
if err != nil {
return stat, fmt.Errorf("Failed to parse GID: %w", err)
}
stat.IDs.GID = int(value)
value, err = strconv.ParseUint(attrs[2], 8, 32)
if err != nil {
return stat, fmt.Errorf("Failed to parse mode: %w", err)
}
stat.Mode = os.FileMode(value)
return stat, nil
}
// SetContainersOverrideXattr will encode and set ContainersOverrideXattr.
func SetContainersOverrideXattr(path string, stat Stat) error {
value := FormatContainersOverrideXattr(stat.IDs.UID, stat.IDs.GID, int(stat.Mode))
return system.Lsetxattr(path, ContainersOverrideXattr, []byte(value), 0)
}
func SafeChown(name string, uid, gid int) error {
if runtime.GOOS == "darwin" {
var mode uint64 = 0o0700
var mode os.FileMode = 0o0700
xstat, err := system.Lgetxattr(name, ContainersOverrideXattr)
if err == nil {
attrs := strings.Split(string(xstat), ":")
if len(attrs) == 3 {
val, err := strconv.ParseUint(attrs[2], 8, 32)
if err == nil {
mode = val
mode = os.FileMode(val)
}
}
}
value := fmt.Sprintf("%d:%d:0%o", uid, gid, mode)
if err = system.Lsetxattr(name, ContainersOverrideXattr, []byte(value), 0); err != nil {
value := Stat{IDPair{uid, gid}, mode}
if err = SetContainersOverrideXattr(name, value); err != nil {
return err
}
uid = os.Getuid()
@ -397,19 +453,19 @@ func SafeChown(name string, uid, gid int) error {
func SafeLchown(name string, uid, gid int) error {
if runtime.GOOS == "darwin" {
var mode uint64 = 0o0700
var mode os.FileMode = 0o0700
xstat, err := system.Lgetxattr(name, ContainersOverrideXattr)
if err == nil {
attrs := strings.Split(string(xstat), ":")
if len(attrs) == 3 {
val, err := strconv.ParseUint(attrs[2], 8, 32)
if err == nil {
mode = val
mode = os.FileMode(val)
}
}
}
value := fmt.Sprintf("%d:%d:0%o", uid, gid, mode)
if err = system.Lsetxattr(name, ContainersOverrideXattr, []byte(value), 0); err != nil {
value := Stat{IDPair{uid, gid}, mode}
if err = SetContainersOverrideXattr(name, value); err != nil {
return err
}
uid = os.Getuid()

View file

@ -150,10 +150,13 @@ func (w *atomicFileWriter) complete(commit bool) (retErr error) {
}
defer func() {
w.closeTempFile()
err := w.closeTempFile()
if retErr != nil || w.writeErr != nil {
os.Remove(w.f.Name())
}
if retErr == nil {
retErr = err
}
}()
if commit {

View file

@ -415,7 +415,9 @@ func (l *LockFile) lock(lType lockType) {
// Optimization: only use the (expensive) syscall when
// the counter is 0. In this case, we're either the first
// reader lock or a writer lock.
lockHandle(l.fd, lType, false)
if err := lockHandle(l.fd, lType, false); err != nil {
panic(err)
}
}
l.lockType = lType
l.locked = true
@ -426,10 +428,13 @@ func (l *LockFile) lock(lType lockType) {
// command.
func (l *LockFile) tryLock(lType lockType) error {
var success bool
var rwMutexUnlocker func()
if lType == readLock {
success = l.rwMutex.TryRLock()
rwMutexUnlocker = l.rwMutex.RUnlock
} else {
success = l.rwMutex.TryLock()
rwMutexUnlocker = l.rwMutex.Unlock
}
if !success {
return fmt.Errorf("resource temporarily unavailable")
@ -440,7 +445,7 @@ func (l *LockFile) tryLock(lType lockType) error {
// If we're the first reference on the lock, we need to open the file again.
fd, err := openLock(l.file, l.ro)
if err != nil {
l.rwMutex.Unlock()
rwMutexUnlocker()
return err
}
l.fd = fd
@ -450,7 +455,7 @@ func (l *LockFile) tryLock(lType lockType) error {
// reader lock or a writer lock.
if err = lockHandle(l.fd, lType, true); err != nil {
closeHandle(fd)
l.rwMutex.Unlock()
rwMutexUnlocker()
return err
}
}

View file

@ -1,5 +1,5 @@
//go:build linux || solaris || darwin || freebsd
// +build linux solaris darwin freebsd
//go:build !windows
// +build !windows
package lockfile

View file

@ -6,10 +6,12 @@ package loopback
import (
"errors"
"fmt"
"io/fs"
"os"
"syscall"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
// Loopback related errors
@ -39,7 +41,7 @@ func getNextFreeLoopbackIndex() (int, error) {
return index, err
}
func openNextAvailableLoopback(index int, sparseName string, sparseFile *os.File) (loopFile *os.File, err error) {
func openNextAvailableLoopback(sparseName string, sparseFile *os.File) (loopFile *os.File, err error) {
// Read information about the loopback file.
var st syscall.Stat_t
err = syscall.Fstat(int(sparseFile.Fd()), &st)
@ -48,31 +50,51 @@ func openNextAvailableLoopback(index int, sparseName string, sparseFile *os.File
return nil, ErrAttachLoopbackDevice
}
// upper bound to avoid infinite loop
remaining := 1000
// Start looking for a free /dev/loop
for {
target := fmt.Sprintf("/dev/loop%d", index)
index++
fi, err := os.Stat(target)
if err != nil {
if os.IsNotExist(err) {
logrus.Error("There are no more loopback devices available.")
}
if remaining == 0 {
logrus.Errorf("No free loopback devices available")
return nil, ErrAttachLoopbackDevice
}
remaining--
if fi.Mode()&os.ModeDevice != os.ModeDevice {
logrus.Errorf("Loopback device %s is not a block device.", target)
continue
index, err := getNextFreeLoopbackIndex()
if err != nil {
logrus.Debugf("Error retrieving the next available loopback: %s", err)
return nil, err
}
target := fmt.Sprintf("/dev/loop%d", index)
// OpenFile adds O_CLOEXEC
loopFile, err = os.OpenFile(target, os.O_RDWR, 0o644)
if err != nil {
// The kernel returns ENXIO when opening a device that is in the "deleting" or "rundown" state, so
// just treat ENXIO as if the device does not exist.
if errors.Is(err, fs.ErrNotExist) || errors.Is(err, unix.ENXIO) {
// Another process could have taken the loopback device in the meantime. So repeat
// the process with the next loopback device.
continue
}
logrus.Errorf("Opening loopback device: %s", err)
return nil, ErrAttachLoopbackDevice
}
fi, err := loopFile.Stat()
if err != nil {
loopFile.Close()
logrus.Errorf("Stat loopback device: %s", err)
return nil, ErrAttachLoopbackDevice
}
if fi.Mode()&os.ModeDevice != os.ModeDevice {
loopFile.Close()
logrus.Errorf("Loopback device %s is not a block device.", target)
continue
}
// Try to attach to the loop file
if err := ioctlLoopSetFd(loopFile.Fd(), sparseFile.Fd()); err != nil {
loopFile.Close()
@ -124,14 +146,6 @@ func AttachLoopDeviceRO(sparseName string) (loop *os.File, err error) {
}
func attachLoopDevice(sparseName string, readonly bool) (loop *os.File, err error) {
// Try to retrieve the next available loopback device via syscall.
// If it fails, we discard error and start looping for a
// loopback from index 0.
startIndex, err := getNextFreeLoopbackIndex()
if err != nil {
logrus.Debugf("Error retrieving the next available loopback: %s", err)
}
var sparseFile *os.File
// OpenFile adds O_CLOEXEC
@ -146,7 +160,7 @@ func attachLoopDevice(sparseName string, readonly bool) (loop *os.File, err erro
}
defer sparseFile.Close()
loopFile, err := openNextAvailableLoopback(startIndex, sparseName, sparseFile)
loopFile, err := openNextAvailableLoopback(sparseName, sparseFile)
if err != nil {
return nil, err
}

View file

@ -1,5 +1,18 @@
package mount
import "github.com/moby/sys/mountinfo"
import (
"fmt"
"os"
var PidMountInfo = mountinfo.PidMountInfo
"github.com/moby/sys/mountinfo"
)
func PidMountInfo(pid int) ([]*Info, error) {
f, err := os.Open(fmt.Sprintf("/proc/%d/mountinfo", pid))
if err != nil {
return nil, err
}
defer f.Close()
return mountinfo.GetMountsFromReader(f, nil)
}

View file

@ -1,5 +1,5 @@
//go:build linux || freebsd || darwin
// +build linux freebsd darwin
//go:build !windows
// +build !windows
package system

View file

@ -526,8 +526,11 @@ func MaybeReexecUsingUserNamespace(evenForRoot bool) {
} else {
// If we have CAP_SYS_ADMIN, then we don't need to create a new namespace in order to be able
// to use unshare(), so don't bother creating a new user namespace at this point.
capabilities, err := capability.NewPid(0)
capabilities, err := capability.NewPid2(0)
bailOnError(err, "Initializing a new Capabilities object of pid 0")
err = capabilities.Load()
bailOnError(err, "Reading the current capabilities sets")
if capabilities.Get(capability.EFFECTIVE, capability.CAP_SYS_ADMIN) {
return
}
@ -587,7 +590,12 @@ func MaybeReexecUsingUserNamespace(evenForRoot bool) {
cmd.Hook = func(int) error {
go func() {
for receivedSignal := range interrupted {
cmd.Cmd.Process.Signal(receivedSignal)
if err := cmd.Cmd.Process.Signal(receivedSignal); err != nil {
logrus.Warnf(
"Failed to send a signal '%d' to the Process (PID: %d): %v",
receivedSignal, cmd.Cmd.Process.Pid, err,
)
}
}
}()
return nil