go.mod: bump osbuild/images to 0.55

This commit is contained in:
Sanne Raymaekers 2024-04-13 15:47:23 +02:00
parent eab44ca8a8
commit 22140aa7c9
700 changed files with 30353 additions and 27556 deletions

View file

@ -1,5 +1,4 @@
//go:build !linux
// +build !linux
package retry

View file

@ -23,9 +23,9 @@ var (
// compressionBufferSize is the buffer size used to compress a blob
compressionBufferSize = 1048576
// expectedCompressionFormats is used to check if a blob with a specified media type is compressed
// expectedBaseCompressionFormats is used to check if a blob with a specified media type is compressed
// using the algorithm that the media type says it should be compressed with
expectedCompressionFormats = map[string]*compressiontypes.Algorithm{
expectedBaseCompressionFormats = map[string]*compressiontypes.Algorithm{
imgspecv1.MediaTypeImageLayerGzip: &compression.Gzip,
imgspecv1.MediaTypeImageLayerZstd: &compression.Zstd,
manifest.DockerV2Schema2LayerMediaType: &compression.Gzip,
@ -62,15 +62,16 @@ func blobPipelineDetectCompressionStep(stream *sourceStream, srcInfo types.BlobI
res.srcCompressorName = internalblobinfocache.Uncompressed
}
if expectedFormat, known := expectedCompressionFormats[stream.info.MediaType]; known && res.isCompressed && format.Name() != expectedFormat.Name() {
logrus.Debugf("blob %s with type %s should be compressed with %s, but compressor appears to be %s", srcInfo.Digest.String(), srcInfo.MediaType, expectedFormat.Name(), format.Name())
if expectedBaseFormat, known := expectedBaseCompressionFormats[stream.info.MediaType]; known && res.isCompressed && format.BaseVariantName() != expectedBaseFormat.Name() {
logrus.Debugf("blob %s with type %s should be compressed with %s, but compressor appears to be %s", srcInfo.Digest.String(), srcInfo.MediaType, expectedBaseFormat.Name(), format.Name())
}
return res, nil
}
// bpCompressionStepData contains data that the copy pipeline needs about the compression step.
type bpCompressionStepData struct {
operation types.LayerCompression // Operation to use for updating the blob metadata.
operation bpcOperation // What we are actually doing
uploadedOperation types.LayerCompression // Operation to use for updating the blob metadata (matching the end state, not necessarily what we do)
uploadedAlgorithm *compressiontypes.Algorithm // An algorithm parameter for the compressionOperation edits.
uploadedAnnotations map[string]string // Annotations that should be set on the uploaded blob. WARNING: This is only set after the srcStream.reader is fully consumed.
srcCompressorName string // Compressor name to record in the blob info cache for the source blob.
@ -78,6 +79,18 @@ type bpCompressionStepData struct {
closers []io.Closer // Objects to close after the upload is done, if any.
}
type bpcOperation int
const (
bpcOpInvalid bpcOperation = iota
bpcOpPreserveOpaque // We are preserving something where compression is not applicable
bpcOpPreserveCompressed // We are preserving a compressed, and decompressible, layer
bpcOpPreserveUncompressed // We are preserving an uncompressed, and compressible, layer
bpcOpCompressUncompressed // We are compressing uncompressed data
bpcOpRecompressCompressed // We are recompressing compressed data
bpcOpDecompressCompressed // We are decompressing compressed data
)
// blobPipelineCompressionStep updates *stream to compress and/or decompress it.
// srcInfo is primarily used for error messages.
// Returns data for other steps; the caller should eventually call updateCompressionEdits and perhaps recordValidatedBlobData,
@ -112,10 +125,11 @@ func (ic *imageCopier) blobPipelineCompressionStep(stream *sourceStream, canModi
// bpcPreserveEncrypted checks if the input is encrypted, and returns a *bpCompressionStepData if so.
func (ic *imageCopier) bpcPreserveEncrypted(stream *sourceStream, _ bpDetectCompressionStepData) (*bpCompressionStepData, error) {
if isOciEncrypted(stream.info.MediaType) {
// We cant do anything with an encrypted blob unless decrypted.
logrus.Debugf("Using original blob without modification for encrypted blob")
// PreserveOriginal due to any compression not being able to be done on an encrypted blob unless decrypted
return &bpCompressionStepData{
operation: types.PreserveOriginal,
operation: bpcOpPreserveOpaque,
uploadedOperation: types.PreserveOriginal,
uploadedAlgorithm: nil,
srcCompressorName: internalblobinfocache.UnknownCompression,
uploadedCompressorName: internalblobinfocache.UnknownCompression,
@ -143,7 +157,8 @@ func (ic *imageCopier) bpcCompressUncompressed(stream *sourceStream, detected bp
Size: -1,
}
return &bpCompressionStepData{
operation: types.Compress,
operation: bpcOpCompressUncompressed,
uploadedOperation: types.Compress,
uploadedAlgorithm: uploadedAlgorithm,
uploadedAnnotations: annotations,
srcCompressorName: detected.srcCompressorName,
@ -157,7 +172,8 @@ func (ic *imageCopier) bpcCompressUncompressed(stream *sourceStream, detected bp
// bpcRecompressCompressed checks if we should be recompressing a compressed input to another format, and returns a *bpCompressionStepData if so.
func (ic *imageCopier) bpcRecompressCompressed(stream *sourceStream, detected bpDetectCompressionStepData) (*bpCompressionStepData, error) {
if ic.c.dest.DesiredLayerCompression() == types.Compress && detected.isCompressed &&
ic.compressionFormat != nil && ic.compressionFormat.Name() != detected.format.Name() {
ic.compressionFormat != nil &&
(ic.compressionFormat.Name() != detected.format.Name() && ic.compressionFormat.Name() != detected.format.BaseVariantName()) {
// When the blob is compressed, but the desired format is different, it first needs to be decompressed and finally
// re-compressed using the desired format.
logrus.Debugf("Blob will be converted")
@ -182,7 +198,8 @@ func (ic *imageCopier) bpcRecompressCompressed(stream *sourceStream, detected bp
}
succeeded = true
return &bpCompressionStepData{
operation: types.PreserveOriginal,
operation: bpcOpRecompressCompressed,
uploadedOperation: types.PreserveOriginal,
uploadedAlgorithm: ic.compressionFormat,
uploadedAnnotations: annotations,
srcCompressorName: detected.srcCompressorName,
@ -208,7 +225,8 @@ func (ic *imageCopier) bpcDecompressCompressed(stream *sourceStream, detected bp
Size: -1,
}
return &bpCompressionStepData{
operation: types.Decompress,
operation: bpcOpDecompressCompressed,
uploadedOperation: types.Decompress,
uploadedAlgorithm: nil,
srcCompressorName: detected.srcCompressorName,
uploadedCompressorName: internalblobinfocache.Uncompressed,
@ -232,14 +250,26 @@ func (ic *imageCopier) bpcPreserveOriginal(_ *sourceStream, detected bpDetectCom
// But dont touch blobs in objects where we cant change compression,
// so that src.UpdatedImage() doesnt fail; assume that for such blobs
// LayerInfosForCopy() should not be making any changes in the first place.
var bpcOp bpcOperation
var uploadedOp types.LayerCompression
var algorithm *compressiontypes.Algorithm
if layerCompressionChangeSupported && detected.isCompressed {
switch {
case !layerCompressionChangeSupported:
bpcOp = bpcOpPreserveOpaque
uploadedOp = types.PreserveOriginal
algorithm = nil
case detected.isCompressed:
bpcOp = bpcOpPreserveCompressed
uploadedOp = types.PreserveOriginal
algorithm = &detected.format
} else {
default:
bpcOp = bpcOpPreserveUncompressed
uploadedOp = types.Decompress
algorithm = nil
}
return &bpCompressionStepData{
operation: types.PreserveOriginal,
operation: bpcOp,
uploadedOperation: uploadedOp,
uploadedAlgorithm: algorithm,
srcCompressorName: detected.srcCompressorName,
uploadedCompressorName: detected.srcCompressorName,
@ -248,7 +278,7 @@ func (ic *imageCopier) bpcPreserveOriginal(_ *sourceStream, detected bpDetectCom
// updateCompressionEdits sets *operation, *algorithm and updates *annotations, if necessary.
func (d *bpCompressionStepData) updateCompressionEdits(operation *types.LayerCompression, algorithm **compressiontypes.Algorithm, annotations *map[string]string) {
*operation = d.operation
*operation = d.uploadedOperation
// If we can modify the layer's blob, set the desired algorithm for it to be set in the manifest.
*algorithm = d.uploadedAlgorithm
if *annotations == nil {
@ -257,7 +287,8 @@ func (d *bpCompressionStepData) updateCompressionEdits(operation *types.LayerCom
maps.Copy(*annotations, d.uploadedAnnotations)
}
// recordValidatedBlobData updates b.blobInfoCache with data about the created uploadedInfo adnd the original srcInfo.
// recordValidatedBlobData updates b.blobInfoCache with data about the created uploadedInfo (as returned by PutBlob)
// and the original srcInfo (which the caller guarantees has been validated).
// This must ONLY be called if all data has been validated by OUR code, and is not coming from third parties.
func (d *bpCompressionStepData) recordValidatedDigestData(c *copier, uploadedInfo types.BlobInfo, srcInfo types.BlobInfo,
encryptionStep *bpEncryptionStepData, decryptionStep *bpDecryptionStepData) error {
@ -268,17 +299,26 @@ func (d *bpCompressionStepData) recordValidatedDigestData(c *copier, uploadedInf
// in the blob info cache (which would probably be necessary for any more complex logic),
// and the simplicity is attractive.
if !encryptionStep.encrypting && !decryptionStep.decrypting {
// If d.operation != types.PreserveOriginal, we now have two reliable digest values:
// If d.operation != bpcOpPreserve*, we now have two reliable digest values:
// srcinfo.Digest describes the pre-d.operation input, verified by digestingReader
// uploadedInfo.Digest describes the post-d.operation output, computed by PutBlob
// (because stream.info.Digest == "", this must have been computed afresh).
// (because we set stream.info.Digest == "", this must have been computed afresh).
switch d.operation {
case types.PreserveOriginal:
break // Do nothing, we have only one digest and we might not have even verified it.
case types.Compress:
case bpcOpPreserveOpaque:
// No useful information
case bpcOpCompressUncompressed:
c.blobInfoCache.RecordDigestUncompressedPair(uploadedInfo.Digest, srcInfo.Digest)
case types.Decompress:
case bpcOpDecompressCompressed:
c.blobInfoCache.RecordDigestUncompressedPair(srcInfo.Digest, uploadedInfo.Digest)
case bpcOpRecompressCompressed, bpcOpPreserveCompressed:
// We know one or two compressed digests. BlobInfoCache associates compression variants via the uncompressed digest,
// and we dont know that one.
// That also means that repeated copies with the same recompression dont identify reuse opportunities (unless
// RecordDigestUncompressedPair was called for both compressed variants for some other reason).
case bpcOpPreserveUncompressed:
c.blobInfoCache.RecordDigestUncompressedPair(srcInfo.Digest, srcInfo.Digest)
case bpcOpInvalid:
fallthrough
default:
return fmt.Errorf("Internal error: Unexpected d.operation value %#v", d.operation)
}
@ -286,7 +326,7 @@ func (d *bpCompressionStepData) recordValidatedDigestData(c *copier, uploadedInf
if d.uploadedCompressorName != "" && d.uploadedCompressorName != internalblobinfocache.UnknownCompression {
if d.uploadedCompressorName != compressiontypes.ZstdChunkedAlgorithmName {
// HACK: Dont record zstd:chunked algorithms.
// There is already a similar hack in internal/imagedestination/impl/helpers.BlobMatchesRequiredCompression,
// There is already a similar hack in internal/imagedestination/impl/helpers.CandidateMatchesTryReusingBlobOptions,
// and that one prevents reusing zstd:chunked blobs, so recording the algorithm here would be mostly harmless.
//
// We skip that here anyway to work around the inability of blobPipelineDetectCompressionStep to differentiate

View file

@ -23,11 +23,11 @@ type digestingReader struct {
func newDigestingReader(source io.Reader, expectedDigest digest.Digest) (*digestingReader, error) {
var digester digest.Digester
if err := expectedDigest.Validate(); err != nil {
return nil, fmt.Errorf("Invalid digest specification %s", expectedDigest)
return nil, fmt.Errorf("invalid digest specification %q: %w", expectedDigest, err)
}
digestAlgorithm := expectedDigest.Algorithm()
if !digestAlgorithm.Available() {
return nil, fmt.Errorf("Invalid digest specification %s: unsupported digest algorithm %s", expectedDigest, digestAlgorithm)
return nil, fmt.Errorf("invalid digest specification %q: unsupported digest algorithm %q", expectedDigest, digestAlgorithm)
}
digester = digestAlgorithm.Digester()

View file

@ -6,8 +6,10 @@ import (
"fmt"
"strings"
internalManifest "github.com/containers/image/v5/internal/manifest"
"github.com/containers/image/v5/internal/set"
"github.com/containers/image/v5/manifest"
compressiontypes "github.com/containers/image/v5/pkg/compression/types"
"github.com/containers/image/v5/types"
v1 "github.com/opencontainers/image-spec/specs-go/v1"
"github.com/sirupsen/logrus"
@ -19,8 +21,8 @@ import (
// Include v2s1 signed but not v2s1 unsigned, because docker/distribution requires a signature even if the unsigned MIME type is used.
var preferredManifestMIMETypes = []string{manifest.DockerV2Schema2MediaType, manifest.DockerV2Schema1SignedMediaType}
// ociEncryptionMIMETypes lists manifest MIME types that are known to support OCI encryption.
var ociEncryptionMIMETypes = []string{v1.MediaTypeImageManifest}
// allManifestMIMETypes lists all possible manifest MIME types.
var allManifestMIMETypes = []string{v1.MediaTypeImageManifest, manifest.DockerV2Schema2MediaType, manifest.DockerV2Schema1SignedMediaType, manifest.DockerV2Schema1MediaType}
// orderedSet is a list of strings (MIME types or platform descriptors in our case), with each string appearing at most once.
type orderedSet struct {
@ -51,9 +53,10 @@ type determineManifestConversionInputs struct {
destSupportedManifestMIMETypes []string // MIME types supported by the destination, per types.ImageDestination.SupportedManifestMIMETypes()
forceManifestMIMEType string // Users choice of forced manifest MIME type
requiresOCIEncryption bool // Restrict to manifest formats that can support OCI encryption
cannotModifyManifestReason string // The reason the manifest cannot be modified, or an empty string if it can
forceManifestMIMEType string // Users choice of forced manifest MIME type
requestedCompressionFormat *compressiontypes.Algorithm // Compression algorithm to use, if the user _explictily_ requested one.
requiresOCIEncryption bool // Restrict to manifest formats that can support OCI encryption
cannotModifyManifestReason string // The reason the manifest cannot be modified, or an empty string if it can
}
// manifestConversionPlan contains the decisions made by determineManifestConversion.
@ -79,42 +82,68 @@ func determineManifestConversion(in determineManifestConversionInputs) (manifest
if in.forceManifestMIMEType != "" {
destSupportedManifestMIMETypes = []string{in.forceManifestMIMEType}
}
if len(destSupportedManifestMIMETypes) == 0 {
if !in.requiresOCIEncryption || manifest.MIMETypeSupportsEncryption(srcType) {
return manifestConversionPlan{ // Anything goes; just use the original as is, do not try any conversions.
preferredMIMEType: srcType,
otherMIMETypeCandidates: []string{},
}, nil
}
destSupportedManifestMIMETypes = ociEncryptionMIMETypes
destSupportedManifestMIMETypes = allManifestMIMETypes
}
restrictiveCompressionRequired := in.requestedCompressionFormat != nil && !internalManifest.CompressionAlgorithmIsUniversallySupported(*in.requestedCompressionFormat)
supportedByDest := set.New[string]()
for _, t := range destSupportedManifestMIMETypes {
if !in.requiresOCIEncryption || manifest.MIMETypeSupportsEncryption(t) {
supportedByDest.Add(t)
if in.requiresOCIEncryption && !manifest.MIMETypeSupportsEncryption(t) {
continue
}
if restrictiveCompressionRequired && !internalManifest.MIMETypeSupportsCompressionAlgorithm(t, *in.requestedCompressionFormat) {
continue
}
supportedByDest.Add(t)
}
if supportedByDest.Empty() {
if len(destSupportedManifestMIMETypes) == 0 { // Coverage: This should never happen, empty values were replaced by ociEncryptionMIMETypes
if len(destSupportedManifestMIMETypes) == 0 { // Coverage: This should never happen, empty values were replaced by allManifestMIMETypes
return manifestConversionPlan{}, errors.New("internal error: destSupportedManifestMIMETypes is empty")
}
// We know, and have verified, that destSupportedManifestMIMETypes is not empty, so encryption must have been involved.
if !in.requiresOCIEncryption { // Coverage: This should never happen, destSupportedManifestMIMETypes was not empty, so we should have filtered for encryption.
return manifestConversionPlan{}, errors.New("internal error: supportedByDest is empty but destSupportedManifestMIMETypes is not, and not encrypting")
}
// We know, and have verified, that destSupportedManifestMIMETypes is not empty, so some filtering of supported MIME types must have been involved.
// destSupportedManifestMIMETypes has three possible origins:
if in.forceManifestMIMEType != "" { // 1. forceManifestType specified
return manifestConversionPlan{}, fmt.Errorf("encryption required together with format %s, which does not support encryption",
in.forceManifestMIMEType)
switch {
case in.requiresOCIEncryption && restrictiveCompressionRequired:
return manifestConversionPlan{}, fmt.Errorf("compression using %s, and encryption, required together with format %s, which does not support both",
in.requestedCompressionFormat.Name(), in.forceManifestMIMEType)
case in.requiresOCIEncryption:
return manifestConversionPlan{}, fmt.Errorf("encryption required together with format %s, which does not support encryption",
in.forceManifestMIMEType)
case restrictiveCompressionRequired:
return manifestConversionPlan{}, fmt.Errorf("compression using %s required together with format %s, which does not support it",
in.requestedCompressionFormat.Name(), in.forceManifestMIMEType)
default:
return manifestConversionPlan{}, errors.New("internal error: forceManifestMIMEType was rejected for an unknown reason")
}
}
if len(in.destSupportedManifestMIMETypes) == 0 { // 2. destination accepts anything and we have chosen ociEncryptionMIMETypes
// Coverage: This should never happen, ociEncryptionMIMETypes all support encryption
return manifestConversionPlan{}, errors.New("internal error: in.destSupportedManifestMIMETypes is empty but supportedByDest is empty as well")
if len(in.destSupportedManifestMIMETypes) == 0 { // 2. destination accepts anything and we have chosen allManifestTypes
if !restrictiveCompressionRequired {
// Coverage: This should never happen.
// If we have not rejected for encryption reasons, we must have rejected due to encryption, but
// allManifestTypes includes OCI, which supports encryption.
return manifestConversionPlan{}, errors.New("internal error: in.destSupportedManifestMIMETypes is empty but supportedByDest is empty as well")
}
// This can legitimately happen when the user asks for completely unsupported formats like Bzip2 or Xz.
return manifestConversionPlan{}, fmt.Errorf("compression using %s required, but none of the known manifest formats support it", in.requestedCompressionFormat.Name())
}
// 3. destination accepts a restricted list of mime types
destMIMEList := strings.Join(destSupportedManifestMIMETypes, ", ")
switch {
case in.requiresOCIEncryption && restrictiveCompressionRequired:
return manifestConversionPlan{}, fmt.Errorf("compression using %s, and encryption, required but the destination only supports MIME types [%s], none of which support both",
in.requestedCompressionFormat.Name(), destMIMEList)
case in.requiresOCIEncryption:
return manifestConversionPlan{}, fmt.Errorf("encryption required but the destination only supports MIME types [%s], none of which support encryption",
destMIMEList)
case restrictiveCompressionRequired:
return manifestConversionPlan{}, fmt.Errorf("compression using %s required but the destination only supports MIME types [%s], none of which support it",
in.requestedCompressionFormat.Name(), destMIMEList)
default: // Coverage: This should never happen, we only filter for in.requiresOCIEncryption || restrictiveCompressionRequired
return manifestConversionPlan{}, errors.New("internal error: supportedByDest is empty but destSupportedManifestMIMETypes is not, and we are neither encrypting nor requiring a restrictive compression algorithm")
}
// 3. destination does not support encryption.
return manifestConversionPlan{}, fmt.Errorf("encryption required but the destination only supports MIME types [%s], none of which support encryption",
strings.Join(destSupportedManifestMIMETypes, ", "))
}
// destSupportedManifestMIMETypes is a static guess; a particular registry may still only support a subset of the types.
@ -156,7 +185,7 @@ func determineManifestConversion(in determineManifestConversionInputs) (manifest
}
logrus.Debugf("Manifest has MIME type %s, ordered candidate list [%s]", srcType, strings.Join(prioritizedTypes.list, ", "))
if len(prioritizedTypes.list) == 0 { // Coverage: destSupportedManifestMIMETypes and supportedByDest, which is a subset, is not empty (or we would have exited above), so this should never happen.
if len(prioritizedTypes.list) == 0 { // Coverage: destSupportedManifestMIMETypes and supportedByDest, which is a subset, is not empty (or we would have exited above), so this should never happen.
return manifestConversionPlan{}, errors.New("Internal error: no candidate MIME types")
}
res := manifestConversionPlan{

View file

@ -38,6 +38,7 @@ type instanceCopy struct {
// Fields which can be used by callers when operation
// is `instanceCopyClone`
cloneArtifactType string
cloneCompressionVariant OptionCompressionVariant
clonePlatform *imgspecv1.Platform
cloneAnnotations map[string]string
@ -142,6 +143,7 @@ func prepareInstanceCopies(list internalManifest.List, instanceDigests []digest.
res = append(res, instanceCopy{
op: instanceCopyClone,
sourceDigest: instanceDigest,
cloneArtifactType: instanceDetails.ReadOnly.ArtifactType,
cloneCompressionVariant: compressionVariant,
clonePlatform: instanceDetails.ReadOnly.Platform,
cloneAnnotations: maps.Clone(instanceDetails.ReadOnly.Annotations),
@ -268,6 +270,7 @@ func (c *copier) copyMultipleImages(ctx context.Context) (copiedManifest []byte,
AddDigest: updated.manifestDigest,
AddSize: int64(len(updated.manifest)),
AddMediaType: updated.manifestMIMEType,
AddArtifactType: instance.cloneArtifactType,
AddPlatform: instance.clonePlatform,
AddAnnotations: instance.cloneAnnotations,
AddCompressionAlgorithms: updated.compressionAlgorithms,

View file

@ -4,6 +4,7 @@ import (
"context"
"fmt"
"io"
"time"
"github.com/containers/image/v5/internal/private"
"github.com/containers/image/v5/types"
@ -148,13 +149,14 @@ type blobChunkAccessorProxy struct {
// The readers must be fully consumed, in the order they are returned, before blocking
// to read the next chunk.
func (s *blobChunkAccessorProxy) GetBlobAt(ctx context.Context, info types.BlobInfo, chunks []private.ImageSourceChunk) (chan io.ReadCloser, chan error, error) {
start := time.Now()
rc, errs, err := s.wrapped.GetBlobAt(ctx, info, chunks)
if err == nil {
total := int64(0)
for _, c := range chunks {
total += int64(c.Length)
}
s.bar.IncrInt64(total)
s.bar.EwmaIncrInt64(total, time.Since(start))
}
return rc, errs, err
}

View file

@ -20,6 +20,7 @@ import (
compressiontypes "github.com/containers/image/v5/pkg/compression/types"
"github.com/containers/image/v5/transports"
"github.com/containers/image/v5/types"
chunkedToc "github.com/containers/storage/pkg/chunked/toc"
digest "github.com/opencontainers/go-digest"
imgspecv1 "github.com/opencontainers/image-spec/specs-go/v1"
"github.com/sirupsen/logrus"
@ -32,6 +33,7 @@ type imageCopier struct {
c *copier
manifestUpdates *types.ManifestUpdateOptions
src *image.SourcedImage
manifestConversionPlan manifestConversionPlan
diffIDsAreNeeded bool
cannotModifyManifestReason string // The reason the manifest cannot be modified, or an empty string if it can
canSubstituteBlobs bool
@ -135,7 +137,7 @@ func (c *copier) copySingleImage(ctx context.Context, unparsedImage *image.Unpar
c: c,
manifestUpdates: &types.ManifestUpdateOptions{InformationOnly: types.ManifestUpdateInformation{Destination: c.dest}},
src: src,
// diffIDsAreNeeded is computed later
// manifestConversionPlan and diffIDsAreNeeded are computed later
cannotModifyManifestReason: cannotModifyManifestReason,
requireCompressionFormatMatch: opts.requireCompressionFormatMatch,
}
@ -163,10 +165,11 @@ func (c *copier) copySingleImage(ctx context.Context, unparsedImage *image.Unpar
destRequiresOciEncryption := (isEncrypted(src) && ic.c.options.OciDecryptConfig == nil) || c.options.OciEncryptLayers != nil
manifestConversionPlan, err := determineManifestConversion(determineManifestConversionInputs{
ic.manifestConversionPlan, err = determineManifestConversion(determineManifestConversionInputs{
srcMIMEType: ic.src.ManifestMIMEType,
destSupportedManifestMIMETypes: ic.c.dest.SupportedManifestMIMETypes(),
forceManifestMIMEType: c.options.ForceManifestMIMEType,
requestedCompressionFormat: ic.compressionFormat,
requiresOCIEncryption: destRequiresOciEncryption,
cannotModifyManifestReason: ic.cannotModifyManifestReason,
})
@ -177,8 +180,8 @@ func (c *copier) copySingleImage(ctx context.Context, unparsedImage *image.Unpar
// code that calls copyUpdatedConfigAndManifest, so that other parts of the copy code
// (e.g. the UpdatedImageNeedsLayerDiffIDs check just below) can make decisions based
// on the expected destination format.
if manifestConversionPlan.preferredMIMETypeNeedsConversion {
ic.manifestUpdates.ManifestMIMEType = manifestConversionPlan.preferredMIMEType
if ic.manifestConversionPlan.preferredMIMETypeNeedsConversion {
ic.manifestUpdates.ManifestMIMEType = ic.manifestConversionPlan.preferredMIMEType
}
// If src.UpdatedImageNeedsLayerDiffIDs(ic.manifestUpdates) will be true, it needs to be true by the time we get here.
@ -217,11 +220,11 @@ func (c *copier) copySingleImage(ctx context.Context, unparsedImage *image.Unpar
manifestBytes, manifestDigest, err := ic.copyUpdatedConfigAndManifest(ctx, targetInstance)
wipResult := copySingleImageResult{
manifest: manifestBytes,
manifestMIMEType: manifestConversionPlan.preferredMIMEType,
manifestMIMEType: ic.manifestConversionPlan.preferredMIMEType,
manifestDigest: manifestDigest,
}
if err != nil {
logrus.Debugf("Writing manifest using preferred type %s failed: %v", manifestConversionPlan.preferredMIMEType, err)
logrus.Debugf("Writing manifest using preferred type %s failed: %v", ic.manifestConversionPlan.preferredMIMEType, err)
// … if it fails, and the failure is either because the manifest is rejected by the registry, or
// because we failed to create a manifest of the specified type because the specific manifest type
// doesn't support the type of compression we're trying to use (e.g. docker v2s2 and zstd), we may
@ -230,13 +233,13 @@ func (c *copier) copySingleImage(ctx context.Context, unparsedImage *image.Unpar
var manifestLayerCompressionIncompatibilityError manifest.ManifestLayerCompressionIncompatibilityError
isManifestRejected := errors.As(err, &manifestTypeRejectedError)
isCompressionIncompatible := errors.As(err, &manifestLayerCompressionIncompatibilityError)
if (!isManifestRejected && !isCompressionIncompatible) || len(manifestConversionPlan.otherMIMETypeCandidates) == 0 {
if (!isManifestRejected && !isCompressionIncompatible) || len(ic.manifestConversionPlan.otherMIMETypeCandidates) == 0 {
// We dont have other options.
// In principle the code below would handle this as well, but the resulting error message is fairly ugly.
// Dont bother the user with MIME types if we have no choice.
return copySingleImageResult{}, err
}
// If the original MIME type is acceptable, determineManifestConversion always uses it as manifestConversionPlan.preferredMIMEType.
// If the original MIME type is acceptable, determineManifestConversion always uses it as ic.manifestConversionPlan.preferredMIMEType.
// So if we are here, we will definitely be trying to convert the manifest.
// With ic.cannotModifyManifestReason != "", that would just be a string of repeated failures for the same reason,
// so lets bail out early and with a better error message.
@ -245,8 +248,8 @@ func (c *copier) copySingleImage(ctx context.Context, unparsedImage *image.Unpar
}
// errs is a list of errors when trying various manifest types. Also serves as an "upload succeeded" flag when set to nil.
errs := []string{fmt.Sprintf("%s(%v)", manifestConversionPlan.preferredMIMEType, err)}
for _, manifestMIMEType := range manifestConversionPlan.otherMIMETypeCandidates {
errs := []string{fmt.Sprintf("%s(%v)", ic.manifestConversionPlan.preferredMIMEType, err)}
for _, manifestMIMEType := range ic.manifestConversionPlan.otherMIMETypeCandidates {
logrus.Debugf("Trying to use manifest type %s…", manifestMIMEType)
ic.manifestUpdates.ManifestMIMEType = manifestMIMEType
attemptedManifest, attemptedManifestDigest, err := ic.copyUpdatedConfigAndManifest(ctx, targetInstance)
@ -380,7 +383,11 @@ func (ic *imageCopier) compareImageDestinationManifestEqual(ctx context.Context,
compressionAlgos := set.New[string]()
for _, srcInfo := range ic.src.LayerInfos() {
if c := compressionAlgorithmFromMIMEType(srcInfo); c != nil {
_, c, err := compressionEditsFromBlobInfo(srcInfo)
if err != nil {
return nil, err
}
if c != nil {
compressionAlgos.Add(c.Name())
}
}
@ -633,17 +640,29 @@ type diffIDResult struct {
err error
}
func compressionAlgorithmFromMIMEType(srcInfo types.BlobInfo) *compressiontypes.Algorithm {
// compressionEditsFromBlobInfo returns a (CompressionOperation, CompressionAlgorithm) value pair suitable
// for types.BlobInfo.
func compressionEditsFromBlobInfo(srcInfo types.BlobInfo) (types.LayerCompression, *compressiontypes.Algorithm, error) {
// This MIME type → compression mapping belongs in manifest-specific code in our manifest
// package (but we should preferably replace/change UpdatedImage instead of productizing
// this workaround).
switch srcInfo.MediaType {
case manifest.DockerV2Schema2LayerMediaType, imgspecv1.MediaTypeImageLayerGzip:
return &compression.Gzip
return types.PreserveOriginal, &compression.Gzip, nil
case imgspecv1.MediaTypeImageLayerZstd:
return &compression.Zstd
tocDigest, err := chunkedToc.GetTOCDigest(srcInfo.Annotations)
if err != nil {
return types.PreserveOriginal, nil, err
}
if tocDigest != nil {
return types.PreserveOriginal, &compression.ZstdChunked, nil
}
return types.PreserveOriginal, &compression.Zstd, nil
case manifest.DockerV2SchemaLayerMediaTypeUncompressed, imgspecv1.MediaTypeImageLayer:
return types.Decompress, nil, nil
default:
return types.PreserveOriginal, nil, nil
}
return nil
}
// copyLayer copies a layer with srcInfo (with known Digest and Annotations and possibly known Size) in src to dest, perhaps (de/re/)compressing it,
@ -657,8 +676,13 @@ func (ic *imageCopier) copyLayer(ctx context.Context, srcInfo types.BlobInfo, to
// which uses the compression information to compute the updated MediaType values.
// (Sadly UpdatedImage() is documented to not update MediaTypes from
// ManifestUpdateOptions.LayerInfos[].MediaType, so we are doing it indirectly.)
if srcInfo.CompressionAlgorithm == nil {
srcInfo.CompressionAlgorithm = compressionAlgorithmFromMIMEType(srcInfo)
if srcInfo.CompressionOperation == types.PreserveOriginal && srcInfo.CompressionAlgorithm == nil {
op, algo, err := compressionEditsFromBlobInfo(srcInfo)
if err != nil {
return types.BlobInfo{}, "", err
}
srcInfo.CompressionOperation = op
srcInfo.CompressionAlgorithm = algo
}
ic.c.printCopyInfo("blob", srcInfo)
@ -681,26 +705,33 @@ func (ic *imageCopier) copyLayer(ctx context.Context, srcInfo types.BlobInfo, to
logrus.Debugf("Checking if we can reuse blob %s: general substitution = %v, compression for MIME type %q = %v",
srcInfo.Digest, ic.canSubstituteBlobs, srcInfo.MediaType, canChangeLayerCompression)
canSubstitute := ic.canSubstituteBlobs && ic.src.CanChangeLayerCompression(srcInfo.MediaType)
// TODO: at this point we don't know whether or not a blob we end up reusing is compressed using an algorithm
// that is acceptable for use on layers in the manifest that we'll be writing later, so if we end up reusing
// a blob that's compressed with e.g. zstd, but we're only allowed to write a v2s2 manifest, this will cause
// a failure when we eventually try to update the manifest with the digest and MIME type of the reused blob.
// Fixing that will probably require passing more information to TryReusingBlob() than the current version of
// the ImageDestination interface lets us pass in.
var requiredCompression *compressiontypes.Algorithm
var originalCompression *compressiontypes.Algorithm
if ic.requireCompressionFormatMatch {
requiredCompression = ic.compressionFormat
originalCompression = srcInfo.CompressionAlgorithm
}
var tocDigest digest.Digest
// Check if we have a chunked layer in storage that's based on that blob. These layers are stored by their TOC digest.
d, err := chunkedToc.GetTOCDigest(srcInfo.Annotations)
if err != nil {
return types.BlobInfo{}, "", err
}
if d != nil {
tocDigest = *d
}
reused, reusedBlob, err := ic.c.dest.TryReusingBlobWithOptions(ctx, srcInfo, private.TryReusingBlobOptions{
Cache: ic.c.blobInfoCache,
CanSubstitute: canSubstitute,
EmptyLayer: emptyLayer,
LayerIndex: &layerIndex,
SrcRef: srcRef,
RequiredCompression: requiredCompression,
OriginalCompression: originalCompression,
Cache: ic.c.blobInfoCache,
CanSubstitute: canSubstitute,
EmptyLayer: emptyLayer,
LayerIndex: &layerIndex,
SrcRef: srcRef,
PossibleManifestFormats: append([]string{ic.manifestConversionPlan.preferredMIMEType}, ic.manifestConversionPlan.otherMIMETypeCandidates...),
RequiredCompression: requiredCompression,
OriginalCompression: srcInfo.CompressionAlgorithm,
TOCDigest: tocDigest,
})
if err != nil {
return types.BlobInfo{}, "", fmt.Errorf("trying to reuse blob %s at destination: %w", srcInfo.Digest, err)
@ -708,7 +739,11 @@ func (ic *imageCopier) copyLayer(ctx context.Context, srcInfo types.BlobInfo, to
if reused {
logrus.Debugf("Skipping blob %s (already present):", srcInfo.Digest)
func() { // A scope for defer
bar := ic.c.createProgressBar(pool, false, types.BlobInfo{Digest: reusedBlob.Digest, Size: 0}, "blob", "skipped: already exists")
label := "skipped: already exists"
if reusedBlob.MatchedByTOCDigest {
label = "skipped: already exists (found by TOC)"
}
bar := ic.c.createProgressBar(pool, false, types.BlobInfo{Digest: reusedBlob.Digest, Size: 0}, "blob", label)
defer bar.Abort(false)
bar.mark100PercentComplete()
}()
@ -741,7 +776,10 @@ func (ic *imageCopier) copyLayer(ctx context.Context, srcInfo types.BlobInfo, to
wrapped: ic.c.rawSource,
bar: bar,
}
uploadedBlob, err := ic.c.dest.PutBlobPartial(ctx, &proxy, srcInfo, ic.c.blobInfoCache)
uploadedBlob, err := ic.c.dest.PutBlobPartial(ctx, &proxy, srcInfo, private.PutBlobPartialOptions{
Cache: ic.c.blobInfoCache,
LayerIndex: layerIndex,
})
if err == nil {
if srcInfo.Size != -1 {
refill := srcInfo.Size - bar.Current()

View file

@ -190,7 +190,7 @@ func (d *dirImageDestination) PutBlobWithOptions(ctx context.Context, stream io.
// If the blob has been successfully reused, returns (true, info, nil).
// If the transport can not reuse the requested blob, TryReusingBlob returns (false, {}, nil); it returns a non-nil error only on an unexpected failure.
func (d *dirImageDestination) TryReusingBlobWithOptions(ctx context.Context, info types.BlobInfo, options private.TryReusingBlobOptions) (bool, private.ReusedBlob, error) {
if !impl.OriginalBlobMatchesRequiredCompression(options) {
if !impl.OriginalCandidateMatchesTryReusingBlobOptions(options) {
return false, private.ReusedBlob{}, nil
}
if info.Digest == "" {

View file

@ -978,13 +978,10 @@ func (c *dockerClient) fetchManifest(ctx context.Context, ref dockerReference, t
// This function can return nil reader when no url is supported by this function. In this case, the caller
// should fallback to fetch the non-external blob (i.e. pull from the registry).
func (c *dockerClient) getExternalBlob(ctx context.Context, urls []string) (io.ReadCloser, int64, error) {
var (
resp *http.Response
err error
)
if len(urls) == 0 {
return nil, 0, errors.New("internal error: getExternalBlob called with no URLs")
}
var remoteErrors []error
for _, u := range urls {
blobURL, err := url.Parse(u)
if err != nil || (blobURL.Scheme != "http" && blobURL.Scheme != "https") {
@ -993,24 +990,28 @@ func (c *dockerClient) getExternalBlob(ctx context.Context, urls []string) (io.R
// NOTE: we must not authenticate on additional URLs as those
// can be abused to leak credentials or tokens. Please
// refer to CVE-2020-15157 for more information.
resp, err = c.makeRequestToResolvedURL(ctx, http.MethodGet, blobURL, nil, nil, -1, noAuth, nil)
if err == nil {
if resp.StatusCode != http.StatusOK {
err = fmt.Errorf("error fetching external blob from %q: %d (%s)", u, resp.StatusCode, http.StatusText(resp.StatusCode))
logrus.Debug(err)
resp.Body.Close()
continue
}
break
resp, err := c.makeRequestToResolvedURL(ctx, http.MethodGet, blobURL, nil, nil, -1, noAuth, nil)
if err != nil {
remoteErrors = append(remoteErrors, err)
continue
}
if resp.StatusCode != http.StatusOK {
err := fmt.Errorf("error fetching external blob from %q: %d (%s)", u, resp.StatusCode, http.StatusText(resp.StatusCode))
remoteErrors = append(remoteErrors, err)
logrus.Debug(err)
resp.Body.Close()
continue
}
return resp.Body, getBlobSize(resp), nil
}
if resp == nil && err == nil {
if remoteErrors == nil {
return nil, 0, nil // fallback to non-external blob
}
if err != nil {
return nil, 0, err
err := fmt.Errorf("failed fetching external blob from all urls: %w", remoteErrors[0])
for _, e := range remoteErrors[1:] {
err = fmt.Errorf("%s, %w", err, e)
}
return resp.Body, getBlobSize(resp), nil
return nil, 0, err
}
func getBlobSize(resp *http.Response) int64 {

View file

@ -27,6 +27,7 @@ import (
"github.com/containers/image/v5/internal/uploadreader"
"github.com/containers/image/v5/manifest"
"github.com/containers/image/v5/pkg/blobinfocache/none"
compressiontypes "github.com/containers/image/v5/pkg/compression/types"
"github.com/containers/image/v5/types"
"github.com/docker/distribution/registry/api/errcode"
v2 "github.com/docker/distribution/registry/api/v2"
@ -311,6 +312,13 @@ func (d *dockerImageDestination) tryReusingExactBlob(ctx context.Context, info t
return false, private.ReusedBlob{}, nil
}
func optionalCompressionName(algo *compressiontypes.Algorithm) string {
if algo != nil {
return algo.Name()
}
return "nil"
}
// TryReusingBlobWithOptions checks whether the transport already contains, or can efficiently reuse, a blob, and if so, applies it to the current destination
// (e.g. if the blob is a filesystem layer, this signifies that the changes it describes need to be applied again when composing a filesystem tree).
// info.Digest must not be empty.
@ -321,7 +329,7 @@ func (d *dockerImageDestination) TryReusingBlobWithOptions(ctx context.Context,
return false, private.ReusedBlob{}, errors.New("Can not check for a blob with unknown digest")
}
if impl.OriginalBlobMatchesRequiredCompression(options) {
if impl.OriginalCandidateMatchesTryReusingBlobOptions(options) {
// First, check whether the blob happens to already exist at the destination.
haveBlob, reusedInfo, err := d.tryReusingExactBlob(ctx, info, options.Cache)
if err != nil {
@ -331,11 +339,8 @@ func (d *dockerImageDestination) TryReusingBlobWithOptions(ctx context.Context,
return true, reusedInfo, nil
}
} else {
requiredCompression := "nil"
if options.OriginalCompression != nil {
requiredCompression = options.OriginalCompression.Name()
}
logrus.Debugf("Ignoring exact blob match case due to compression mismatch ( %s vs %s )", options.RequiredCompression.Name(), requiredCompression)
logrus.Debugf("Ignoring exact blob match, compression %s does not match required %s or MIME types %#v",
optionalCompressionName(options.OriginalCompression), optionalCompressionName(options.RequiredCompression), options.PossibleManifestFormats)
}
// Then try reusing blobs from other locations.
@ -355,15 +360,13 @@ func (d *dockerImageDestination) TryReusingBlobWithOptions(ctx context.Context,
continue
}
}
if !impl.BlobMatchesRequiredCompression(options, compressionAlgorithm) {
requiredCompression := "nil"
if compressionAlgorithm != nil {
requiredCompression = compressionAlgorithm.Name()
}
if !impl.CandidateMatchesTryReusingBlobOptions(options, compressionAlgorithm) {
if !candidate.UnknownLocation {
logrus.Debugf("Ignoring candidate blob %s as reuse candidate due to compression mismatch ( %s vs %s ) in %s", candidate.Digest.String(), options.RequiredCompression.Name(), requiredCompression, candidateRepo.Name())
logrus.Debugf("Ignoring candidate blob %s in %s, compression %s does not match required %s or MIME types %#v", candidate.Digest.String(), candidateRepo.Name(),
optionalCompressionName(compressionAlgorithm), optionalCompressionName(options.RequiredCompression), options.PossibleManifestFormats)
} else {
logrus.Debugf("Ignoring candidate blob %s as reuse candidate due to compression mismatch ( %s vs %s ) with no location match, checking current repo", candidate.Digest.String(), options.RequiredCompression.Name(), requiredCompression)
logrus.Debugf("Ignoring candidate blob %s with no known location, compression %s does not match required %s or MIME types %#v", candidate.Digest.String(),
optionalCompressionName(compressionAlgorithm), optionalCompressionName(options.RequiredCompression), options.PossibleManifestFormats)
}
continue
}

View file

@ -129,7 +129,7 @@ func (d *Destination) PutBlobWithOptions(ctx context.Context, stream io.Reader,
// If the blob has been successfully reused, returns (true, info, nil).
// If the transport can not reuse the requested blob, TryReusingBlob returns (false, {}, nil); it returns a non-nil error only on an unexpected failure.
func (d *Destination) TryReusingBlobWithOptions(ctx context.Context, info types.BlobInfo, options private.TryReusingBlobOptions) (bool, private.ReusedBlob, error) {
if !impl.OriginalBlobMatchesRequiredCompression(options) {
if !impl.OriginalCandidateMatchesTryReusingBlobOptions(options) {
return false, private.ReusedBlob{}, nil
}
if err := d.archive.lock(); err != nil {

View file

@ -36,7 +36,7 @@ type BlobInfoCache2 interface {
// that could possibly be reused within the specified (transport scope) (if they still
// exist, which is not guaranteed).
//
// If !canSubstitute, the returned cadidates will match the submitted digest exactly; if
// If !canSubstitute, the returned candidates will match the submitted digest exactly; if
// canSubstitute, data from previous RecordDigestUncompressedPair calls is used to also look
// up variants of the blob which have the same uncompressed digest.
//

View file

@ -1,25 +1,42 @@
package impl
import (
"github.com/containers/image/v5/internal/manifest"
"github.com/containers/image/v5/internal/private"
compression "github.com/containers/image/v5/pkg/compression/types"
"golang.org/x/exp/slices"
)
// BlobMatchesRequiredCompression validates if compression is required by the caller while selecting a blob, if it is required
// CandidateMatchesTryReusingBlobOptions validates if compression is required by the caller while selecting a blob, if it is required
// then function performs a match against the compression requested by the caller and compression of existing blob
// (which can be nil to represent uncompressed or unknown)
func BlobMatchesRequiredCompression(options private.TryReusingBlobOptions, candidateCompression *compression.Algorithm) bool {
if options.RequiredCompression == nil {
return true // no requirement imposed
func CandidateMatchesTryReusingBlobOptions(options private.TryReusingBlobOptions, candidateCompression *compression.Algorithm) bool {
if options.RequiredCompression != nil {
if options.RequiredCompression.Name() == compression.ZstdChunkedAlgorithmName {
// HACK: Never match when the caller asks for zstd:chunked, because we dont record the annotations required to use the chunked blobs.
// The caller must re-compress to build those annotations.
return false
}
if candidateCompression == nil ||
(options.RequiredCompression.Name() != candidateCompression.Name() && options.RequiredCompression.Name() != candidateCompression.BaseVariantName()) {
return false
}
}
if options.RequiredCompression.Name() == compression.ZstdChunkedAlgorithmName {
// HACK: Never match when the caller asks for zstd:chunked, because we dont record the annotations required to use the chunked blobs.
// The caller must re-compress to build those annotations.
return false
// For candidateCompression == nil, we cant tell the difference between “uncompressed” and “unknown”;
// and “uncompressed” is acceptable in all known formats (well, it seems to work in practice for schema1),
// so dont impose any restrictions if candidateCompression == nil
if options.PossibleManifestFormats != nil && candidateCompression != nil {
if !slices.ContainsFunc(options.PossibleManifestFormats, func(mt string) bool {
return manifest.MIMETypeSupportsCompressionAlgorithm(mt, *candidateCompression)
}) {
return false
}
}
return candidateCompression != nil && (options.RequiredCompression.Name() == candidateCompression.Name())
return true
}
func OriginalBlobMatchesRequiredCompression(opts private.TryReusingBlobOptions) bool {
return BlobMatchesRequiredCompression(opts, opts.OriginalCompression)
func OriginalCandidateMatchesTryReusingBlobOptions(opts private.TryReusingBlobOptions) bool {
return CandidateMatchesTryReusingBlobOptions(opts, opts.OriginalCompression)
}

View file

@ -4,7 +4,6 @@ import (
"context"
"fmt"
"github.com/containers/image/v5/internal/blobinfocache"
"github.com/containers/image/v5/internal/private"
"github.com/containers/image/v5/types"
)
@ -39,7 +38,7 @@ func (stub NoPutBlobPartialInitialize) SupportsPutBlobPartial() bool {
// It is available only if SupportsPutBlobPartial().
// Even if SupportsPutBlobPartial() returns true, the call can fail, in which case the caller
// should fall back to PutBlobWithOptions.
func (stub NoPutBlobPartialInitialize) PutBlobPartial(ctx context.Context, chunkAccessor private.BlobChunkAccessor, srcInfo types.BlobInfo, cache blobinfocache.BlobInfoCache2) (private.UploadedBlob, error) {
func (stub NoPutBlobPartialInitialize) PutBlobPartial(ctx context.Context, chunkAccessor private.BlobChunkAccessor, srcInfo types.BlobInfo, options private.PutBlobPartialOptions) (private.UploadedBlob, error) {
return private.UploadedBlob{}, fmt.Errorf("internal error: PutBlobPartial is not supported by the %q transport", stub.transportName)
}

View file

@ -28,7 +28,7 @@ type wrapped struct {
//
// NOTE: The returned API MUST NOT be a public interface (it can be either just a struct
// with public methods, or perhaps a private interface), so that we can add methods
// without breaking any external implementors of a public interface.
// without breaking any external implementers of a public interface.
func FromPublic(dest types.ImageDestination) private.ImageDestination {
if dest2, ok := dest.(private.ImageDestination); ok {
return dest2

View file

@ -27,7 +27,7 @@ type wrapped struct {
//
// NOTE: The returned API MUST NOT be a public interface (it can be either just a struct
// with public methods, or perhaps a private interface), so that we can add methods
// without breaking any external implementors of a public interface.
// without breaking any external implementers of a public interface.
func FromPublic(src types.ImageSource) private.ImageSource {
if src2, ok := src.(private.ImageSource); ok {
return src2

View file

@ -73,6 +73,7 @@ type ListUpdate struct {
Platform *imgspecv1.Platform
Annotations map[string]string
CompressionAlgorithmNames []string
ArtifactType string
}
}
@ -101,6 +102,7 @@ type ListEdit struct {
AddDigest digest.Digest
AddSize int64
AddMediaType string
AddArtifactType string
AddPlatform *imgspecv1.Platform
AddAnnotations map[string]string
AddCompressionAlgorithms []compression.Algorithm

View file

@ -3,6 +3,7 @@ package manifest
import (
"encoding/json"
compressiontypes "github.com/containers/image/v5/pkg/compression/types"
"github.com/containers/libtrust"
digest "github.com/opencontainers/go-digest"
imgspecv1 "github.com/opencontainers/image-spec/specs-go/v1"
@ -14,7 +15,7 @@ import (
const (
// DockerV2Schema1MediaType MIME type represents Docker manifest schema 1
DockerV2Schema1MediaType = "application/vnd.docker.distribution.manifest.v1+json"
// DockerV2Schema1MediaType MIME type represents Docker manifest schema 1 with a JWS signature
// DockerV2Schema1SignedMediaType MIME type represents Docker manifest schema 1 with a JWS signature
DockerV2Schema1SignedMediaType = "application/vnd.docker.distribution.manifest.v1+prettyjws"
// DockerV2Schema2MediaType MIME type represents Docker manifest schema 2
DockerV2Schema2MediaType = "application/vnd.docker.distribution.manifest.v2+json"
@ -165,3 +166,29 @@ func NormalizedMIMEType(input string) string {
return DockerV2Schema1SignedMediaType
}
}
// CompressionAlgorithmIsUniversallySupported returns true if MIMETypeSupportsCompressionAlgorithm(mimeType, algo) returns true for all mimeType values.
func CompressionAlgorithmIsUniversallySupported(algo compressiontypes.Algorithm) bool {
// Compare the discussion about BaseVariantName in MIMETypeSupportsCompressionAlgorithm().
switch algo.Name() {
case compressiontypes.GzipAlgorithmName:
return true
default:
return false
}
}
// MIMETypeSupportsCompressionAlgorithm returns true if mimeType can represent algo.
func MIMETypeSupportsCompressionAlgorithm(mimeType string, algo compressiontypes.Algorithm) bool {
if CompressionAlgorithmIsUniversallySupported(algo) {
return true
}
// This does not use BaseVariantName: Plausibly a manifest format might support zstd but not have annotation fields.
// The logic might have to be more complex (and more ad-hoc) if more manifest formats, with more capabilities, emerge.
switch algo.Name() {
case compressiontypes.ZstdAlgorithmName, compressiontypes.ZstdChunkedAlgorithmName:
return mimeType == imgspecv1.MediaTypeImageManifest
default: // Includes Bzip2AlgorithmName and XzAlgorithmName, which are defined names but are not supported anywhere
return false
}
}

View file

@ -61,6 +61,7 @@ func (index *OCI1IndexPublic) Instance(instanceDigest digest.Digest) (ListUpdate
ret.ReadOnly.Platform = manifest.Platform
ret.ReadOnly.Annotations = manifest.Annotations
ret.ReadOnly.CompressionAlgorithmNames = annotationsToCompressionAlgorithmNames(manifest.Annotations)
ret.ReadOnly.ArtifactType = manifest.ArtifactType
return ret, nil
}
}
@ -102,7 +103,7 @@ func addCompressionAnnotations(compressionAlgorithms []compression.Algorithm, an
*annotationsMap = map[string]string{}
}
for _, algo := range compressionAlgorithms {
switch algo.Name() {
switch algo.BaseVariantName() {
case compression.ZstdAlgorithmName:
(*annotationsMap)[OCI1InstanceAnnotationCompressionZSTD] = OCI1InstanceAnnotationCompressionZSTDValue
default:
@ -157,11 +158,13 @@ func (index *OCI1IndexPublic) editInstances(editInstances []ListEdit) error {
}
addCompressionAnnotations(editInstance.AddCompressionAlgorithms, &annotations)
addedEntries = append(addedEntries, imgspecv1.Descriptor{
MediaType: editInstance.AddMediaType,
Size: editInstance.AddSize,
Digest: editInstance.AddDigest,
Platform: editInstance.AddPlatform,
Annotations: annotations})
MediaType: editInstance.AddMediaType,
ArtifactType: editInstance.AddArtifactType,
Size: editInstance.AddSize,
Digest: editInstance.AddDigest,
Platform: editInstance.AddPlatform,
Annotations: annotations,
})
default:
return fmt.Errorf("internal error: invalid operation: %d", editInstance.ListOperation)
}
@ -299,12 +302,13 @@ func OCI1IndexPublicFromComponents(components []imgspecv1.Descriptor, annotation
platform = &platformCopy
}
m := imgspecv1.Descriptor{
MediaType: component.MediaType,
Size: component.Size,
Digest: component.Digest,
URLs: slices.Clone(component.URLs),
Annotations: maps.Clone(component.Annotations),
Platform: platform,
MediaType: component.MediaType,
ArtifactType: component.ArtifactType,
Size: component.Size,
Digest: component.Digest,
URLs: slices.Clone(component.URLs),
Annotations: maps.Clone(component.Annotations),
Platform: platform,
}
index.Manifests[i] = m
}

View file

@ -25,6 +25,7 @@ import (
"github.com/containers/image/v5/types"
imgspecv1 "github.com/opencontainers/image-spec/specs-go/v1"
"github.com/sirupsen/logrus"
"golang.org/x/exp/slices"
)
@ -82,15 +83,40 @@ func getCPUVariantWindows(arch string) string {
func getCPUVariantArm() string {
variant, err := getCPUInfo("Cpu architecture")
if err != nil {
logrus.Errorf("Couldn't get cpu architecture: %v", err)
return ""
}
// TODO handle RPi Zero mismatch (https://github.com/moby/moby/pull/36121#issuecomment-398328286)
switch strings.ToLower(variant) {
case "8", "aarch64":
variant = "v8"
case "7", "7m", "?(12)", "?(13)", "?(14)", "?(15)", "?(16)", "?(17)":
case "7m", "?(12)", "?(13)", "?(14)", "?(15)", "?(16)", "?(17)":
variant = "v7"
case "7":
// handle RPi Zero variant mismatch due to wrong variant from kernel
// https://github.com/containerd/containerd/pull/4530
// https://www.raspberrypi.org/forums/viewtopic.php?t=12614
// https://github.com/moby/moby/pull/36121#issuecomment-398328286
model, err := getCPUInfo("model name")
if err != nil {
logrus.Errorf("Couldn't get cpu model name, it may be the corner case where variant is 6: %v", err)
return ""
}
// model name is NOT a value provided by the CPU; it is another outcome of Linux CPU detection,
// https://github.com/torvalds/linux/blob/190bf7b14b0cf3df19c059061be032bd8994a597/arch/arm/mm/proc-v6.S#L178C35-L178C35
// (matching happens based on value + mask at https://github.com/torvalds/linux/blob/190bf7b14b0cf3df19c059061be032bd8994a597/arch/arm/mm/proc-v6.S#L273-L274 )
// ARM CPU ID starts with a “main” ID register https://developer.arm.com/documentation/ddi0406/cb/System-Level-Architecture/System-Control-Registers-in-a-VMSA-implementation/VMSA-System-control-registers-descriptions--in-register-order/MIDR--Main-ID-Register--VMSA?lang=en ,
// but the ARMv6/ARMv7 differences are not a single dimension, https://developer.arm.com/documentation/ddi0406/cb/System-Level-Architecture/The-CPUID-Identification-Scheme?lang=en .
// The Linux "cpu architecture" is determined by a “memory model” feature.
//
// So, the "armv6-compatible" check basically checks for a "v6 or v7 CPU, but not one found listed as a known v7 one in the .proc.info.init tables of
// https://github.com/torvalds/linux/blob/190bf7b14b0cf3df19c059061be032bd8994a597/arch/arm/mm/proc-v7.S .
if strings.HasPrefix(strings.ToLower(model), "armv6-compatible") {
logrus.Debugf("Detected corner case, setting cpu variant to v6")
variant = "v6"
} else {
variant = "v7"
}
case "6", "6tej":
variant = "v6"
case "5", "5t", "5te", "5tej":

View file

@ -55,7 +55,7 @@ type ImageDestinationInternalOnly interface {
// It is available only if SupportsPutBlobPartial().
// Even if SupportsPutBlobPartial() returns true, the call can fail, in which case the caller
// should fall back to PutBlobWithOptions.
PutBlobPartial(ctx context.Context, chunkAccessor BlobChunkAccessor, srcInfo types.BlobInfo, cache blobinfocache.BlobInfoCache2) (UploadedBlob, error)
PutBlobPartial(ctx context.Context, chunkAccessor BlobChunkAccessor, srcInfo types.BlobInfo, options PutBlobPartialOptions) (UploadedBlob, error)
// TryReusingBlobWithOptions checks whether the transport already contains, or can efficiently reuse, a blob, and if so, applies it to the current destination
// (e.g. if the blob is a filesystem layer, this signifies that the changes it describes need to be applied again when composing a filesystem tree).
@ -100,6 +100,12 @@ type PutBlobOptions struct {
LayerIndex *int // If the blob is a layer, a zero-based index of the layer within the image; nil otherwise.
}
// PutBlobPartialOptions are used in PutBlobPartial.
type PutBlobPartialOptions struct {
Cache blobinfocache.BlobInfoCache2 // Cache to use and/or update.
LayerIndex int // A zero-based index of the layer within the image (PutBlobPartial is only called with layer-like blobs, not configs)
}
// TryReusingBlobOptions are used in TryReusingBlobWithOptions.
type TryReusingBlobOptions struct {
Cache blobinfocache.BlobInfoCache2 // Cache to use and/or update.
@ -112,11 +118,13 @@ type TryReusingBlobOptions struct {
// Transports, OTOH, MUST support these fields being zero-valued for types.ImageDestination callers
// if they use internal/imagedestination/impl.Compat;
// in that case, they will all be consistently zero-valued.
RequiredCompression *compression.Algorithm // If set, reuse blobs with a matching algorithm as per implementations in internal/imagedestination/impl.helpers.go
OriginalCompression *compression.Algorithm // Must be set if RequiredCompression is set; can be set to nil to indicate “uncompressed” or “unknown”.
EmptyLayer bool // True if the blob is an "empty"/"throwaway" layer, and may not necessarily be physically represented.
LayerIndex *int // If the blob is a layer, a zero-based index of the layer within the image; nil otherwise.
SrcRef reference.Named // A reference to the source image that contains the input blob.
EmptyLayer bool // True if the blob is an "empty"/"throwaway" layer, and may not necessarily be physically represented.
LayerIndex *int // If the blob is a layer, a zero-based index of the layer within the image; nil otherwise.
SrcRef reference.Named // A reference to the source image that contains the input blob.
PossibleManifestFormats []string // A set of possible manifest formats; at least one should support the reused layer blob.
RequiredCompression *compression.Algorithm // If set, reuse blobs with a matching algorithm as per implementations in internal/imagedestination/impl.helpers.go
OriginalCompression *compression.Algorithm // May be nil to indicate “uncompressed” or “unknown”.
TOCDigest digest.Digest // If specified, the blob can be looked up in the destination also by its TOC digest.
}
// ReusedBlob is information about a blob reused in a destination.
@ -128,6 +136,8 @@ type ReusedBlob struct {
// a differently-compressed blob.
CompressionOperation types.LayerCompression // Compress/Decompress, matching the reused blob; PreserveOriginal if N/A
CompressionAlgorithm *compression.Algorithm // Algorithm if compressed, nil if decompressed or N/A
MatchedByTOCDigest bool // Whether the layer was reused/matched by TOC digest. Used only for UI purposes.
}
// ImageSourceChunk is a portion of a blob.

View file

@ -55,7 +55,7 @@ func compressionVariantMIMEType(variantTable []compressionMIMETypeSet, mimeType
if variants != nil {
name := mtsUncompressed
if algorithm != nil {
name = algorithm.InternalUnstableUndocumentedMIMEQuestionMark()
name = algorithm.BaseVariantName()
}
if res, ok := variants[name]; ok {
if res != mtsUnsupportedMIMEType {

View file

@ -10,6 +10,7 @@ import (
"github.com/containers/image/v5/docker/reference"
"github.com/containers/image/v5/internal/manifest"
"github.com/containers/image/v5/internal/set"
compressiontypes "github.com/containers/image/v5/pkg/compression/types"
"github.com/containers/image/v5/types"
"github.com/containers/storage/pkg/regexp"
"github.com/docker/docker/api/types/versions"
@ -142,6 +143,15 @@ func (m *Schema1) LayerInfos() []LayerInfo {
return layers
}
const fakeSchema1MIMEType = DockerV2Schema2LayerMediaType // Used only in schema1CompressionMIMETypeSets
var schema1CompressionMIMETypeSets = []compressionMIMETypeSet{
{
mtsUncompressed: fakeSchema1MIMEType,
compressiontypes.GzipAlgorithmName: fakeSchema1MIMEType,
compressiontypes.ZstdAlgorithmName: mtsUnsupportedMIMEType,
},
}
// UpdateLayerInfos replaces the original layers with the specified BlobInfos (size+digest+urls), in order (the root layer first, and then successive layered layers)
func (m *Schema1) UpdateLayerInfos(layerInfos []types.BlobInfo) error {
// Our LayerInfos includes empty layers (where m.ExtractedV1Compatibility[].ThrowAway), so expect them to be included here as well.
@ -150,6 +160,11 @@ func (m *Schema1) UpdateLayerInfos(layerInfos []types.BlobInfo) error {
}
m.FSLayers = make([]Schema1FSLayers, len(layerInfos))
for i, info := range layerInfos {
// There are no MIME types in schema1, but we do a “conversion” here to reject unsupported compression algorithms,
// in a way that is consistent with the other schema implementations.
if _, err := updatedMIMEType(schema1CompressionMIMETypeSets, fakeSchema1MIMEType, info); err != nil {
return fmt.Errorf("preparing updated manifest, layer %q: %w", info.Digest, err)
}
// (docker push) sets up m.ExtractedV1Compatibility[].{Id,Parent} based on values of info.Digest,
// but (docker pull) ignores them in favor of computing DiffIDs from uncompressed data, except verifying the child->parent links and uniqueness.
// So, we don't bother recomputing the IDs in m.History.V1Compatibility.

View file

@ -16,7 +16,7 @@ import (
const (
// DockerV2Schema1MediaType MIME type represents Docker manifest schema 1
DockerV2Schema1MediaType = manifest.DockerV2Schema1MediaType
// DockerV2Schema1MediaType MIME type represents Docker manifest schema 1 with a JWS signature
// DockerV2Schema1SignedMediaType MIME type represents Docker manifest schema 1 with a JWS signature
DockerV2Schema1SignedMediaType = manifest.DockerV2Schema1SignedMediaType
// DockerV2Schema2MediaType MIME type represents Docker manifest schema 2
DockerV2Schema2MediaType = manifest.DockerV2Schema2MediaType

View file

@ -235,7 +235,7 @@ func (m *OCI1) Inspect(configGetter func(types.BlobInfo) ([]byte, error)) (*type
}
// ImageID computes an ID which can uniquely identify this image by its contents.
func (m *OCI1) ImageID([]digest.Digest) (string, error) {
func (m *OCI1) ImageID(diffIDs []digest.Digest) (string, error) {
// The way m.Config.Digest “uniquely identifies” an image is
// by containing RootFS.DiffIDs, which identify the layers of the image.
// For non-image artifacts, the we cant expect the config to change

View file

@ -6,13 +6,13 @@ import (
"io"
"os"
"github.com/containers/image/v5/internal/blobinfocache"
"github.com/containers/image/v5/internal/imagedestination"
"github.com/containers/image/v5/internal/imagedestination/impl"
"github.com/containers/image/v5/internal/private"
"github.com/containers/image/v5/internal/signature"
"github.com/containers/image/v5/types"
"github.com/containers/storage/pkg/archive"
"github.com/containers/storage/pkg/idtools"
digest "github.com/opencontainers/go-digest"
"github.com/sirupsen/logrus"
)
@ -119,8 +119,8 @@ func (d *ociArchiveImageDestination) PutBlobWithOptions(ctx context.Context, str
// It is available only if SupportsPutBlobPartial().
// Even if SupportsPutBlobPartial() returns true, the call can fail, in which case the caller
// should fall back to PutBlobWithOptions.
func (d *ociArchiveImageDestination) PutBlobPartial(ctx context.Context, chunkAccessor private.BlobChunkAccessor, srcInfo types.BlobInfo, cache blobinfocache.BlobInfoCache2) (private.UploadedBlob, error) {
return d.unpackedDest.PutBlobPartial(ctx, chunkAccessor, srcInfo, cache)
func (d *ociArchiveImageDestination) PutBlobPartial(ctx context.Context, chunkAccessor private.BlobChunkAccessor, srcInfo types.BlobInfo, options private.PutBlobPartialOptions) (private.UploadedBlob, error) {
return d.unpackedDest.PutBlobPartial(ctx, chunkAccessor, srcInfo, options)
}
// TryReusingBlobWithOptions checks whether the transport already contains, or can efficiently reuse, a blob, and if so, applies it to the current destination
@ -169,10 +169,15 @@ func (d *ociArchiveImageDestination) Commit(ctx context.Context, unparsedTopleve
// tar converts the directory at src and saves it to dst
func tarDirectory(src, dst string) error {
// input is a stream of bytes from the archive of the directory at path
input, err := archive.Tar(src, archive.Uncompressed)
input, err := archive.TarWithOptions(src, &archive.TarOptions{
Compression: archive.Uncompressed,
// Dont include the data about the user account this code is running under.
ChownOpts: &idtools.IDPair{UID: 0, GID: 0},
})
if err != nil {
return fmt.Errorf("retrieving stream of bytes from %q: %w", src, err)
}
defer input.Close()
// creates the tar file
outFile, err := os.Create(dst)

View file

@ -173,7 +173,7 @@ func (d *ociImageDestination) PutBlobWithOptions(ctx context.Context, stream io.
// If the blob has been successfully reused, returns (true, info, nil).
// If the transport can not reuse the requested blob, TryReusingBlob returns (false, {}, nil); it returns a non-nil error only on an unexpected failure.
func (d *ociImageDestination) TryReusingBlobWithOptions(ctx context.Context, info types.BlobInfo, options private.TryReusingBlobOptions) (bool, private.ReusedBlob, error) {
if !impl.OriginalBlobMatchesRequiredCompression(options) {
if !impl.OriginalCandidateMatchesTryReusingBlobOptions(options) {
return false, private.ReusedBlob{}, nil
}
if info.Digest == "" {

View file

@ -12,7 +12,6 @@ import (
"github.com/containers/image/v5/docker"
"github.com/containers/image/v5/docker/reference"
"github.com/containers/image/v5/internal/blobinfocache"
"github.com/containers/image/v5/internal/imagedestination"
"github.com/containers/image/v5/internal/imagedestination/impl"
"github.com/containers/image/v5/internal/imagedestination/stubs"
@ -128,8 +127,8 @@ func (d *openshiftImageDestination) PutBlobWithOptions(ctx context.Context, stre
// It is available only if SupportsPutBlobPartial().
// Even if SupportsPutBlobPartial() returns true, the call can fail, in which case the caller
// should fall back to PutBlobWithOptions.
func (d *openshiftImageDestination) PutBlobPartial(ctx context.Context, chunkAccessor private.BlobChunkAccessor, srcInfo types.BlobInfo, cache blobinfocache.BlobInfoCache2) (private.UploadedBlob, error) {
return d.docker.PutBlobPartial(ctx, chunkAccessor, srcInfo, cache)
func (d *openshiftImageDestination) PutBlobPartial(ctx context.Context, chunkAccessor private.BlobChunkAccessor, srcInfo types.BlobInfo, options private.PutBlobPartialOptions) (private.UploadedBlob, error) {
return d.docker.PutBlobPartial(ctx, chunkAccessor, srcInfo, options)
}
// TryReusingBlobWithOptions checks whether the transport already contains, or can efficiently reuse, a blob, and if so, applies it to the current destination

View file

@ -335,7 +335,7 @@ func (d *ostreeImageDestination) importConfig(repo *otbuiltin.Repo, blob *blobTo
// reflected in the manifest that will be written.
// If the transport can not reuse the requested blob, TryReusingBlob returns (false, {}, nil); it returns a non-nil error only on an unexpected failure.
func (d *ostreeImageDestination) TryReusingBlobWithOptions(ctx context.Context, info types.BlobInfo, options private.TryReusingBlobOptions) (bool, private.ReusedBlob, error) {
if !impl.OriginalBlobMatchesRequiredCompression(options) {
if !impl.OriginalCandidateMatchesTryReusingBlobOptions(options) {
return false, private.ReusedBlob{}, nil
}
if d.repo == nil {

View file

@ -91,11 +91,11 @@ func min(a, b int) int {
// destructivelyPrioritizeReplacementCandidatesWithMax is destructivelyPrioritizeReplacementCandidates with parameters for the
// number of entries to limit for known and unknown location separately, only to make testing simpler.
// TODO: following function is not destructive any more in the nature instead priortized result is actually copies of the original
// TODO: following function is not destructive any more in the nature instead prioritized result is actually copies of the original
// candidate set, so In future we might wanna re-name this public API and remove the destructive prefix.
func destructivelyPrioritizeReplacementCandidatesWithMax(cs []CandidateWithTime, primaryDigest, uncompressedDigest digest.Digest, totalLimit int, noLocationLimit int) []blobinfocache.BICReplacementCandidate2 {
// split unknown candidates and known candidates
// and limit them seperately.
// and limit them separately.
var knownLocationCandidates []CandidateWithTime
var unknownLocationCandidates []CandidateWithTime
// We don't need to use sort.Stable() because nanosecond timestamps are (presumably?) unique, so no two elements should

View file

@ -184,7 +184,7 @@ func (mem *cache) CandidateLocations(transport types.ImageTransport, scope types
// CandidateLocations2 returns a prioritized, limited, number of blobs and their locations (if known) that could possibly be reused
// within the specified (transport scope) (if they still exist, which is not guaranteed).
//
// If !canSubstitute, the returned cadidates will match the submitted digest exactly; if canSubstitute,
// If !canSubstitute, the returned candidates will match the submitted digest exactly; if canSubstitute,
// data from previous RecordDigestUncompressedPair calls is used to also look up variants of the blob which have the same
// uncompressed digest.
func (mem *cache) CandidateLocations2(transport types.ImageTransport, scope types.BICTransportScope, primaryDigest digest.Digest, canSubstitute bool) []blobinfocache.BICReplacementCandidate2 {

View file

@ -171,7 +171,7 @@ func transaction[T any](sqc *cache, fn func(tx *sql.Tx) (T, error)) (T, error) {
// dbTransaction calls fn within a read-write transaction in db.
func dbTransaction[T any](db *sql.DB, fn func(tx *sql.Tx) (T, error)) (T, error) {
// Ideally we should be able to distinguish between read-only and read-write transactions, see the _txlock=exclusive dicussion.
// Ideally we should be able to distinguish between read-only and read-write transactions, see the _txlock=exclusive discussion.
var zeroRes T // A zero value of T
@ -496,7 +496,7 @@ func (sqc *cache) appendReplacementCandidates(candidates []prioritize.CandidateW
// that could possibly be reused within the specified (transport scope) (if they still
// exist, which is not guaranteed).
//
// If !canSubstitute, the returned cadidates will match the submitted digest exactly; if
// If !canSubstitute, the returned candidates will match the submitted digest exactly; if
// canSubstitute, data from previous RecordDigestUncompressedPair calls is used to also look
// up variants of the blob which have the same uncompressed digest.
//

View file

@ -19,19 +19,19 @@ type Algorithm = types.Algorithm
var (
// Gzip compression.
Gzip = internal.NewAlgorithm(types.GzipAlgorithmName, types.GzipAlgorithmName,
Gzip = internal.NewAlgorithm(types.GzipAlgorithmName, "",
[]byte{0x1F, 0x8B, 0x08}, GzipDecompressor, gzipCompressor)
// Bzip2 compression.
Bzip2 = internal.NewAlgorithm(types.Bzip2AlgorithmName, types.Bzip2AlgorithmName,
Bzip2 = internal.NewAlgorithm(types.Bzip2AlgorithmName, "",
[]byte{0x42, 0x5A, 0x68}, Bzip2Decompressor, bzip2Compressor)
// Xz compression.
Xz = internal.NewAlgorithm(types.XzAlgorithmName, types.XzAlgorithmName,
Xz = internal.NewAlgorithm(types.XzAlgorithmName, "",
[]byte{0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00}, XzDecompressor, xzCompressor)
// Zstd compression.
Zstd = internal.NewAlgorithm(types.ZstdAlgorithmName, types.ZstdAlgorithmName,
Zstd = internal.NewAlgorithm(types.ZstdAlgorithmName, "",
[]byte{0x28, 0xb5, 0x2f, 0xfd}, ZstdDecompressor, zstdCompressor)
// ZstdChunked is a Zstd compression with chunk metadta which allows random access to individual files.
ZstdChunked = internal.NewAlgorithm(types.ZstdChunkedAlgorithmName, types.ZstdAlgorithmName, /* Note: InternalUnstableUndocumentedMIMEQuestionMark is not ZstdChunkedAlgorithmName */
// ZstdChunked is a Zstd compression with chunk metadata which allows random access to individual files.
ZstdChunked = internal.NewAlgorithm(types.ZstdChunkedAlgorithmName, types.ZstdAlgorithmName,
nil, ZstdDecompressor, compressor.ZstdCompressor)
compressionAlgorithms = map[string]Algorithm{

View file

@ -12,23 +12,28 @@ type DecompressorFunc func(io.Reader) (io.ReadCloser, error)
// Algorithm is a compression algorithm that can be used for CompressStream.
type Algorithm struct {
name string
mime string
prefix []byte // Initial bytes of a stream compressed using this algorithm, or empty to disable detection.
decompressor DecompressorFunc
compressor CompressorFunc
name string
baseVariantName string
prefix []byte // Initial bytes of a stream compressed using this algorithm, or empty to disable detection.
decompressor DecompressorFunc
compressor CompressorFunc
}
// NewAlgorithm creates an Algorithm instance.
// nontrivialBaseVariantName is typically "".
// This function exists so that Algorithm instances can only be created by code that
// is allowed to import this internal subpackage.
func NewAlgorithm(name, mime string, prefix []byte, decompressor DecompressorFunc, compressor CompressorFunc) Algorithm {
func NewAlgorithm(name, nontrivialBaseVariantName string, prefix []byte, decompressor DecompressorFunc, compressor CompressorFunc) Algorithm {
baseVariantName := name
if nontrivialBaseVariantName != "" {
baseVariantName = nontrivialBaseVariantName
}
return Algorithm{
name: name,
mime: mime,
prefix: prefix,
decompressor: decompressor,
compressor: compressor,
name: name,
baseVariantName: baseVariantName,
prefix: prefix,
decompressor: decompressor,
compressor: compressor,
}
}
@ -37,10 +42,11 @@ func (c Algorithm) Name() string {
return c.name
}
// InternalUnstableUndocumentedMIMEQuestionMark ???
// DO NOT USE THIS anywhere outside of c/image until it is properly documented.
func (c Algorithm) InternalUnstableUndocumentedMIMEQuestionMark() string {
return c.mime
// BaseVariantName returns the name of the “base variant” of the compression algorithm.
// It is either equal to Name() of the same algorithm, or equal to Name() of some other Algorithm (the “base variant”).
// This supports a single level of “is-a” relationship between compression algorithms, e.g. where "zstd:chunked" data is valid "zstd" data.
func (c Algorithm) BaseVariantName() string {
return c.baseVariantName
}
// AlgorithmCompressor returns the compressor field of algo.

View file

@ -1,3 +1,6 @@
//go:build !containers_image_fulcio_stub
// +build !containers_image_fulcio_stub
package signature
import (

View file

@ -0,0 +1,28 @@
//go:build containers_image_fulcio_stub
// +build containers_image_fulcio_stub
package signature
import (
"crypto"
"crypto/ecdsa"
"crypto/x509"
"errors"
)
type fulcioTrustRoot struct {
caCertificates *x509.CertPool
oidcIssuer string
subjectEmail string
}
func (f *fulcioTrustRoot) validate() error {
return errors.New("fulcio disabled at compile-time")
}
func verifyRekorFulcio(rekorPublicKey *ecdsa.PublicKey, fulcioTrustRoot *fulcioTrustRoot, untrustedRekorSET []byte,
untrustedCertificateBytes []byte, untrustedIntermediateChainBytes []byte, untrustedBase64Signature string,
untrustedPayloadBytes []byte) (crypto.PublicKey, error) {
return nil, errors.New("fulcio disabled at compile-time")
}

View file

@ -1,3 +1,6 @@
//go:build !containers_image_rekor_stub
// +build !containers_image_rekor_stub
package internal
import (
@ -216,6 +219,10 @@ func VerifyRekorSET(publicKey *ecdsa.PublicKey, unverifiedRekorSET []byte, unver
if hashedRekordV001.Data.Hash.Algorithm == nil {
return time.Time{}, NewInvalidSignatureError(`Missing "data.hash.algorithm" field in hashedrekord`)
}
// FIXME: Rekor 1.3.5 has added SHA-386 and SHA-512 as recognized values.
// Eventually we should support them as well; doing that cleanly would require updqating to Rekor 1.3.5, which requires Go 1.21.
// Short-term, Cosign (as of 2024-02 and Cosign 2.2.3) only produces and accepts SHA-256, so right now thats not a compatibility
// issue.
if *hashedRekordV001.Data.Hash.Algorithm != models.HashedrekordV001SchemaDataHashAlgorithmSha256 {
return time.Time{}, NewInvalidSignatureError(fmt.Sprintf(`Unexpected "data.hash.algorithm" value %#v`, *hashedRekordV001.Data.Hash.Algorithm))
}

View file

@ -0,0 +1,15 @@
//go:build containers_image_rekor_stub
// +build containers_image_rekor_stub
package internal
import (
"crypto/ecdsa"
"time"
)
// VerifyRekorSET verifies that unverifiedRekorSET is correctly signed by publicKey and matches the rest of the data.
// Returns bundle upload time on success.
func VerifyRekorSET(publicKey *ecdsa.PublicKey, unverifiedRekorSET []byte, unverifiedKeyOrCertBytes []byte, unverifiedBase64Signature string, unverifiedPayloadBytes []byte) (time.Time, error) {
return time.Time{}, NewInvalidSignatureError("rekor disabled at compile-time")
}

View file

@ -16,7 +16,6 @@ import (
"sync/atomic"
"github.com/containers/image/v5/docker/reference"
"github.com/containers/image/v5/internal/blobinfocache"
"github.com/containers/image/v5/internal/imagedestination/impl"
"github.com/containers/image/v5/internal/imagedestination/stubs"
"github.com/containers/image/v5/internal/private"
@ -55,41 +54,61 @@ type storageImageDestination struct {
stubs.ImplementsPutBlobPartial
stubs.AlwaysSupportsSignatures
imageRef storageReference
directory string // Temporary directory where we store blobs until Commit() time
nextTempFileID atomic.Int32 // A counter that we use for computing filenames to assign to blobs
manifest []byte // Manifest contents, temporary
manifestDigest digest.Digest // Valid if len(manifest) != 0
signatures []byte // Signature contents, temporary
signatureses map[digest.Digest][]byte // Instance signature contents, temporary
SignatureSizes []int `json:"signature-sizes,omitempty"` // List of sizes of each signature slice
SignaturesSizes map[digest.Digest][]int `json:"signatures-sizes,omitempty"` // Sizes of each manifest's signature slice
imageRef storageReference
directory string // Temporary directory where we store blobs until Commit() time
nextTempFileID atomic.Int32 // A counter that we use for computing filenames to assign to blobs
manifest []byte // Manifest contents, temporary
manifestDigest digest.Digest // Valid if len(manifest) != 0
untrustedDiffIDValues []digest.Digest // From configs RootFS.DiffIDs, valid if not nil
signatures []byte // Signature contents, temporary
signatureses map[digest.Digest][]byte // Instance signature contents, temporary
metadata storageImageMetadata // Metadata contents being built
// A storage destination may be used concurrently. Accesses are
// serialized via a mutex. Please refer to the individual comments
// below for details.
lock sync.Mutex
// Mapping from layer (by index) to the associated ID in the storage.
// It's protected *implicitly* since `commitLayer()`, at any given
// time, can only be executed by *one* goroutine. Please refer to
// `queueOrCommit()` for further details on how the single-caller
// guarantee is implemented.
indexToStorageID map[int]*string
// All accesses to below data are protected by `lock` which is made
// *explicit* in the code.
blobDiffIDs map[digest.Digest]digest.Digest // Mapping from layer blobsums to their corresponding DiffIDs
fileSizes map[digest.Digest]int64 // Mapping from layer blobsums to their sizes
filenames map[digest.Digest]string // Mapping from layer blobsums to names of files we used to hold them
currentIndex int // The index of the layer to be committed (i.e., lower indices have already been committed)
indexToAddedLayerInfo map[int]addedLayerInfo // Mapping from layer (by index) to blob to add to the image
blobAdditionalLayer map[digest.Digest]storage.AdditionalLayer // Mapping from layer blobsums to their corresponding additional layer
diffOutputs map[digest.Digest]*graphdriver.DriverWithDifferOutput // Mapping from digest to differ output
indexToStorageID map[int]string
// A storage destination may be used concurrently, due to HasThreadSafePutBlob.
lock sync.Mutex // Protects lockProtected
lockProtected storageImageDestinationLockProtected
}
// storageImageDestinationLockProtected contains storageImageDestination data which might be
// accessed concurrently, due to HasThreadSafePutBlob.
// _During the concurrent TryReusingBlob/PutBlob/* calls_ (but not necessarily during the final Commit)
// uses must hold storageImageDestination.lock.
type storageImageDestinationLockProtected struct {
currentIndex int // The index of the layer to be committed (i.e., lower indices have already been committed)
indexToAddedLayerInfo map[int]addedLayerInfo // Mapping from layer (by index) to blob to add to the image
// In general, a layer is identified either by (compressed) digest, or by TOC digest.
// When creating a layer, the c/storage layer metadata and image IDs must _only_ be based on trusted values
// we have computed ourselves. (Layer reuse can then look up against such trusted values, but it might not
// recompute those values for incomding layers — the point of the reuse is that we dont need to consume the incoming layer.)
// Layer identification: For a layer, at least one of indexToTOCDigest and blobDiffIDs must be available before commitLayer is called.
// The presence of an indexToTOCDigest is what decides how the layer is identified, i.e. which fields must be trusted.
blobDiffIDs map[digest.Digest]digest.Digest // Mapping from layer blobsums to their corresponding DiffIDs
indexToTOCDigest map[int]digest.Digest // Mapping from layer index to a TOC Digest, IFF the layer was created/found/reused by TOC digest
// Layer data: Before commitLayer is called, either at least one of (diffOutputs, blobAdditionalLayer, filenames)
// should be available; or indexToTOCDigest/blobDiffIDs should be enough to locate an existing c/storage layer.
// They are looked up in the order they are mentioned above.
diffOutputs map[int]*graphdriver.DriverWithDifferOutput // Mapping from layer index to a partially-pulled layer intermediate data
blobAdditionalLayer map[digest.Digest]storage.AdditionalLayer // Mapping from layer blobsums to their corresponding additional layer
// Mapping from layer blobsums to names of files we used to hold them. If set, fileSizes and blobDiffIDs must also be set.
filenames map[digest.Digest]string
// Mapping from layer blobsums to their sizes. If set, filenames and blobDiffIDs must also be set.
fileSizes map[digest.Digest]int64
}
// addedLayerInfo records data about a layer to use in this image.
type addedLayerInfo struct {
digest digest.Digest
emptyLayer bool // The layer is an “empty”/“throwaway” one, and may or may not be physically represented in various transport / storage systems. false if the manifest type does not have the concept.
digest digest.Digest // Mandatory, the digest of the layer.
emptyLayer bool // The layer is an “empty”/“throwaway” one, and may or may not be physically represented in various transport / storage systems. false if the manifest type does not have the concept.
}
// newImageDestination sets us up to write a new image, caching blobs in a temporary directory until
@ -117,18 +136,23 @@ func newImageDestination(sys *types.SystemContext, imageRef storageReference) (*
HasThreadSafePutBlob: true,
}),
imageRef: imageRef,
directory: directory,
signatureses: make(map[digest.Digest][]byte),
blobDiffIDs: make(map[digest.Digest]digest.Digest),
blobAdditionalLayer: make(map[digest.Digest]storage.AdditionalLayer),
fileSizes: make(map[digest.Digest]int64),
filenames: make(map[digest.Digest]string),
SignatureSizes: []int{},
SignaturesSizes: make(map[digest.Digest][]int),
indexToStorageID: make(map[int]*string),
indexToAddedLayerInfo: make(map[int]addedLayerInfo),
diffOutputs: make(map[digest.Digest]*graphdriver.DriverWithDifferOutput),
imageRef: imageRef,
directory: directory,
signatureses: make(map[digest.Digest][]byte),
metadata: storageImageMetadata{
SignatureSizes: []int{},
SignaturesSizes: make(map[digest.Digest][]int),
},
indexToStorageID: make(map[int]string),
lockProtected: storageImageDestinationLockProtected{
indexToAddedLayerInfo: make(map[int]addedLayerInfo),
blobDiffIDs: make(map[digest.Digest]digest.Digest),
indexToTOCDigest: make(map[int]digest.Digest),
diffOutputs: make(map[int]*graphdriver.DriverWithDifferOutput),
blobAdditionalLayer: make(map[digest.Digest]storage.AdditionalLayer),
filenames: make(map[digest.Digest]string),
fileSizes: make(map[digest.Digest]int64),
},
}
dest.Compat = impl.AddCompat(dest)
return dest, nil
@ -142,12 +166,13 @@ func (s *storageImageDestination) Reference() types.ImageReference {
// Close cleans up the temporary directory and additional layer store handlers.
func (s *storageImageDestination) Close() error {
for _, al := range s.blobAdditionalLayer {
// This is outside of the scope of HasThreadSafePutBlob, so we dont need to hold s.lock.
for _, al := range s.lockProtected.blobAdditionalLayer {
al.Release()
}
for _, v := range s.diffOutputs {
for _, v := range s.lockProtected.diffOutputs {
if v.Target != "" {
_ = s.imageRef.transport.store.CleanupStagingDirectory(v.Target)
_ = s.imageRef.transport.store.CleanupStagedLayer(v)
}
}
return os.RemoveAll(s.directory)
@ -227,9 +252,9 @@ func (s *storageImageDestination) putBlobToPendingFile(stream io.Reader, blobinf
// Record information about the blob.
s.lock.Lock()
s.blobDiffIDs[blobDigest] = diffID.Digest()
s.fileSizes[blobDigest] = counter.Count
s.filenames[blobDigest] = filename
s.lockProtected.blobDiffIDs[blobDigest] = diffID.Digest()
s.lockProtected.fileSizes[blobDigest] = counter.Count
s.lockProtected.filenames[blobDigest] = filename
s.lock.Unlock()
// This is safe because we have just computed diffID, and blobDigest was either computed
// by us, or validated by the caller (usually copy.digestingReader).
@ -269,14 +294,14 @@ func (f *zstdFetcher) GetBlobAt(chunks []chunked.ImageSourceChunk) (chan io.Read
// It is available only if SupportsPutBlobPartial().
// Even if SupportsPutBlobPartial() returns true, the call can fail, in which case the caller
// should fall back to PutBlobWithOptions.
func (s *storageImageDestination) PutBlobPartial(ctx context.Context, chunkAccessor private.BlobChunkAccessor, srcInfo types.BlobInfo, cache blobinfocache.BlobInfoCache2) (private.UploadedBlob, error) {
func (s *storageImageDestination) PutBlobPartial(ctx context.Context, chunkAccessor private.BlobChunkAccessor, srcInfo types.BlobInfo, options private.PutBlobPartialOptions) (private.UploadedBlob, error) {
fetcher := zstdFetcher{
chunkAccessor: chunkAccessor,
ctx: ctx,
blobInfo: srcInfo,
}
differ, err := chunked.GetDiffer(ctx, s.imageRef.transport.store, srcInfo.Size, srcInfo.Annotations, &fetcher)
differ, err := chunked.GetDiffer(ctx, s.imageRef.transport.store, srcInfo.Digest, srcInfo.Size, srcInfo.Annotations, &fetcher)
if err != nil {
return private.UploadedBlob{}, err
}
@ -286,13 +311,25 @@ func (s *storageImageDestination) PutBlobPartial(ctx context.Context, chunkAcces
return private.UploadedBlob{}, err
}
if out.TOCDigest == "" && out.UncompressedDigest == "" {
return private.UploadedBlob{}, errors.New("internal error: ApplyDiffWithDiffer succeeded with neither TOCDigest nor UncompressedDigest set")
}
blobDigest := srcInfo.Digest
s.lock.Lock()
s.blobDiffIDs[blobDigest] = blobDigest
s.fileSizes[blobDigest] = 0
s.filenames[blobDigest] = ""
s.diffOutputs[blobDigest] = out
if out.UncompressedDigest != "" {
// The computation of UncompressedDigest means the whole layer has been consumed; while doing that, chunked.GetDiffer is
// responsible for ensuring blobDigest has been validated.
s.lockProtected.blobDiffIDs[blobDigest] = out.UncompressedDigest
} else {
// Dont identify layers by TOC if UncompressedDigest is available.
// - Using UncompressedDigest allows image reuse with non-partially-pulled layers
// - If UncompressedDigest has been computed, that means the layer was read completely, and the TOC has been created from scratch.
// That TOC is quite unlikely to match with any other TOC value.
s.lockProtected.indexToTOCDigest[options.LayerIndex] = out.TOCDigest
}
s.lockProtected.diffOutputs[options.LayerIndex] = out
s.lock.Unlock()
return private.UploadedBlob{
@ -307,7 +344,7 @@ func (s *storageImageDestination) PutBlobPartial(ctx context.Context, chunkAcces
// If the blob has been successfully reused, returns (true, info, nil).
// If the transport can not reuse the requested blob, TryReusingBlob returns (false, {}, nil); it returns a non-nil error only on an unexpected failure.
func (s *storageImageDestination) TryReusingBlobWithOptions(ctx context.Context, blobinfo types.BlobInfo, options private.TryReusingBlobOptions) (bool, private.ReusedBlob, error) {
if !impl.OriginalBlobMatchesRequiredCompression(options) {
if !impl.OriginalCandidateMatchesTryReusingBlobOptions(options) {
return false, private.ReusedBlob{}, nil
}
reused, info, err := s.tryReusingBlobAsPending(blobinfo.Digest, blobinfo.Size, &options)
@ -321,68 +358,79 @@ func (s *storageImageDestination) TryReusingBlobWithOptions(ctx context.Context,
})
}
// tryReusingBlobAsPending implements TryReusingBlobWithOptions for (digest, size or -1), filling s.blobDiffIDs and other metadata.
// tryReusingBlobAsPending implements TryReusingBlobWithOptions for (blobDigest, size or -1), filling s.blobDiffIDs and other metadata.
// The caller must arrange the blob to be eventually committed using s.commitLayer().
func (s *storageImageDestination) tryReusingBlobAsPending(digest digest.Digest, size int64, options *private.TryReusingBlobOptions) (bool, private.ReusedBlob, error) {
func (s *storageImageDestination) tryReusingBlobAsPending(blobDigest digest.Digest, size int64, options *private.TryReusingBlobOptions) (bool, private.ReusedBlob, error) {
// lock the entire method as it executes fairly quickly
s.lock.Lock()
defer s.lock.Unlock()
if options.SrcRef != nil {
// Check if we have the layer in the underlying additional layer store.
aLayer, err := s.imageRef.transport.store.LookupAdditionalLayer(digest, options.SrcRef.String())
aLayer, err := s.imageRef.transport.store.LookupAdditionalLayer(blobDigest, options.SrcRef.String())
if err != nil && !errors.Is(err, storage.ErrLayerUnknown) {
return false, private.ReusedBlob{}, fmt.Errorf(`looking for compressed layers with digest %q and labels: %w`, digest, err)
return false, private.ReusedBlob{}, fmt.Errorf(`looking for compressed layers with digest %q and labels: %w`, blobDigest, err)
} else if err == nil {
// Record the uncompressed value so that we can use it to calculate layer IDs.
s.blobDiffIDs[digest] = aLayer.UncompressedDigest()
s.blobAdditionalLayer[digest] = aLayer
s.lockProtected.blobDiffIDs[blobDigest] = aLayer.UncompressedDigest()
s.lockProtected.blobAdditionalLayer[blobDigest] = aLayer
return true, private.ReusedBlob{
Digest: digest,
Digest: blobDigest,
Size: aLayer.CompressedSize(),
}, nil
}
}
if digest == "" {
if blobDigest == "" {
return false, private.ReusedBlob{}, errors.New(`Can not check for a blob with unknown digest`)
}
if err := digest.Validate(); err != nil {
if err := blobDigest.Validate(); err != nil {
return false, private.ReusedBlob{}, fmt.Errorf("Can not check for a blob with invalid digest: %w", err)
}
if options.TOCDigest != "" {
if err := options.TOCDigest.Validate(); err != nil {
return false, private.ReusedBlob{}, fmt.Errorf("Can not check for a blob with invalid digest: %w", err)
}
}
// Check if we have a wasn't-compressed layer in storage that's based on that blob.
// Check if we've already cached it in a file.
if size, ok := s.fileSizes[digest]; ok {
if size, ok := s.lockProtected.fileSizes[blobDigest]; ok {
// s.lockProtected.blobDiffIDs is set either by putBlobToPendingFile or in createNewLayer when creating the
// filenames/fileSizes entry.
return true, private.ReusedBlob{
Digest: digest,
Digest: blobDigest,
Size: size,
}, nil
}
// Check if we have a wasn't-compressed layer in storage that's based on that blob.
layers, err := s.imageRef.transport.store.LayersByUncompressedDigest(digest)
layers, err := s.imageRef.transport.store.LayersByUncompressedDigest(blobDigest)
if err != nil && !errors.Is(err, storage.ErrLayerUnknown) {
return false, private.ReusedBlob{}, fmt.Errorf(`looking for layers with digest %q: %w`, digest, err)
return false, private.ReusedBlob{}, fmt.Errorf(`looking for layers with digest %q: %w`, blobDigest, err)
}
if len(layers) > 0 {
// Save this for completeness.
s.blobDiffIDs[digest] = layers[0].UncompressedDigest
s.lockProtected.blobDiffIDs[blobDigest] = blobDigest
return true, private.ReusedBlob{
Digest: digest,
Digest: blobDigest,
Size: layers[0].UncompressedSize,
}, nil
}
// Check if we have a was-compressed layer in storage that's based on that blob.
layers, err = s.imageRef.transport.store.LayersByCompressedDigest(digest)
layers, err = s.imageRef.transport.store.LayersByCompressedDigest(blobDigest)
if err != nil && !errors.Is(err, storage.ErrLayerUnknown) {
return false, private.ReusedBlob{}, fmt.Errorf(`looking for compressed layers with digest %q: %w`, digest, err)
return false, private.ReusedBlob{}, fmt.Errorf(`looking for compressed layers with digest %q: %w`, blobDigest, err)
}
if len(layers) > 0 {
// Record the uncompressed value so that we can use it to calculate layer IDs.
s.blobDiffIDs[digest] = layers[0].UncompressedDigest
// LayersByCompressedDigest only finds layers which were created from a full layer blob, and extracting that
// always sets UncompressedDigest.
diffID := layers[0].UncompressedDigest
if diffID == "" {
return false, private.ReusedBlob{}, fmt.Errorf("internal error: compressed layer %q (for compressed digest %q) does not have an uncompressed digest", layers[0].ID, blobDigest.String())
}
s.lockProtected.blobDiffIDs[blobDigest] = diffID
return true, private.ReusedBlob{
Digest: digest,
Digest: blobDigest,
Size: layers[0].CompressedSize,
}, nil
}
@ -391,23 +439,23 @@ func (s *storageImageDestination) tryReusingBlobAsPending(digest digest.Digest,
// Because we must return the size, which is unknown for unavailable compressed blobs, the returned BlobInfo refers to the
// uncompressed layer, and that can happen only if options.CanSubstitute, or if the incoming manifest already specifies the size.
if options.CanSubstitute || size != -1 {
if uncompressedDigest := options.Cache.UncompressedDigest(digest); uncompressedDigest != "" && uncompressedDigest != digest {
if uncompressedDigest := options.Cache.UncompressedDigest(blobDigest); uncompressedDigest != "" && uncompressedDigest != blobDigest {
layers, err := s.imageRef.transport.store.LayersByUncompressedDigest(uncompressedDigest)
if err != nil && !errors.Is(err, storage.ErrLayerUnknown) {
return false, private.ReusedBlob{}, fmt.Errorf(`looking for layers with digest %q: %w`, uncompressedDigest, err)
}
if len(layers) > 0 {
if size != -1 {
s.blobDiffIDs[digest] = layers[0].UncompressedDigest
s.lockProtected.blobDiffIDs[blobDigest] = uncompressedDigest
return true, private.ReusedBlob{
Digest: digest,
Digest: blobDigest,
Size: size,
}, nil
}
if !options.CanSubstitute {
return false, private.ReusedBlob{}, fmt.Errorf("Internal error: options.CanSubstitute was expected to be true for blob with digest %s", digest)
return false, private.ReusedBlob{}, fmt.Errorf("Internal error: options.CanSubstitute was expected to be true for blob with digest %s", blobDigest)
}
s.blobDiffIDs[uncompressedDigest] = layers[0].UncompressedDigest
s.lockProtected.blobDiffIDs[uncompressedDigest] = uncompressedDigest
return true, private.ReusedBlob{
Digest: uncompressedDigest,
Size: layers[0].UncompressedSize,
@ -416,6 +464,32 @@ func (s *storageImageDestination) tryReusingBlobAsPending(digest digest.Digest,
}
}
if options.TOCDigest != "" && options.LayerIndex != nil {
// Check if we have a chunked layer in storage with the same TOC digest.
layers, err := s.imageRef.transport.store.LayersByTOCDigest(options.TOCDigest)
if err != nil && !errors.Is(err, storage.ErrLayerUnknown) {
return false, private.ReusedBlob{}, fmt.Errorf(`looking for layers with TOC digest %q: %w`, options.TOCDigest, err)
}
if len(layers) > 0 {
if size != -1 {
s.lockProtected.indexToTOCDigest[*options.LayerIndex] = options.TOCDigest
return true, private.ReusedBlob{
Digest: blobDigest,
Size: size,
MatchedByTOCDigest: true,
}, nil
} else if options.CanSubstitute && layers[0].UncompressedDigest != "" {
s.lockProtected.indexToTOCDigest[*options.LayerIndex] = options.TOCDigest
return true, private.ReusedBlob{
Digest: layers[0].UncompressedDigest,
Size: layers[0].UncompressedSize,
MatchedByTOCDigest: true,
}, nil
}
}
}
// Nope, we don't have it.
return false, private.ReusedBlob{}, nil
}
@ -425,6 +499,8 @@ func (s *storageImageDestination) tryReusingBlobAsPending(digest digest.Digest,
// that since we don't have a recommendation, a random ID should be used if one needs
// to be allocated.
func (s *storageImageDestination) computeID(m manifest.Manifest) string {
// This is outside of the scope of HasThreadSafePutBlob, so we dont need to hold s.lock.
// Build the diffID list. We need the decompressed sums that we've been calculating to
// fill in the DiffIDs. It's expected (but not enforced by us) that the number of
// diffIDs corresponds to the number of non-EmptyLayer entries in the history.
@ -438,24 +514,59 @@ func (s *storageImageDestination) computeID(m manifest.Manifest) string {
continue
}
blobSum := m.FSLayers[i].BlobSum
diffID, ok := s.blobDiffIDs[blobSum]
diffID, ok := s.lockProtected.blobDiffIDs[blobSum]
if !ok {
// this can, in principle, legitimately happen when a layer is reused by TOC.
logrus.Infof("error looking up diffID for layer %q", blobSum.String())
return ""
}
diffIDs = append([]digest.Digest{diffID}, diffIDs...)
}
case *manifest.Schema2, *manifest.OCI1:
// We know the ID calculation for these formats doesn't actually use the diffIDs,
// so we don't need to populate the diffID list.
// We know the ID calculation doesn't actually use the diffIDs, so we don't need to populate
// the diffID list.
default:
return ""
}
id, err := m.ImageID(diffIDs)
// We want to use the same ID for “the same” images, but without risking unwanted sharing / malicious image corruption.
//
// Traditionally that means the same ~config digest, as computed by m.ImageID;
// but if we pull a layer by TOC, we verify the layer against neither the (compressed) blob digest in the manifest,
// nor against the configs RootFS.DiffIDs. We dont really want to do either, to allow partial layer pulls where we never see
// most of the data.
//
// So, if a layer is pulled by TOC (and we do validate against the TOC), the fact that we used the TOC, and the value of the TOC,
// must enter into the image ID computation.
// But for images where no TOC was used, continue to use IDs computed the traditional way, to maximize image reuse on upgrades,
// and to introduce the changed behavior only when partial pulls are used.
//
// Note that its not 100% guaranteed that an image pulled by TOC uses an OCI manifest; consider
// (skopeo copy --format v2s2 docker://…/zstd-chunked-image containers-storage:… ). So this is not happening only in the OCI case above.
ordinaryImageID, err := m.ImageID(diffIDs)
if err != nil {
return ""
}
return id
tocIDInput := ""
hasLayerPulledByTOC := false
for i := range m.LayerInfos() {
layerValue := "" // An empty string is not a valid digest, so this is unambiguous with the TOC case.
tocDigest, ok := s.lockProtected.indexToTOCDigest[i] // "" if not a TOC
if ok {
hasLayerPulledByTOC = true
layerValue = tocDigest.String()
}
tocIDInput += layerValue + "|" // "|" can not be present in a TOC digest, so this is an unambiguous separator.
}
if !hasLayerPulledByTOC {
return ordinaryImageID
}
// ordinaryImageID is a digest of a config, which is a JSON value.
// To avoid the risk of collisions, start the input with @ so that the input is not a valid JSON.
tocImageID := digest.FromString("@With TOC:" + tocIDInput).Hex()
logrus.Debugf("Ordinary storage image ID %s; a layer was looked up by TOC, so using image ID %s", ordinaryImageID, tocImageID)
return tocImageID
}
// getConfigBlob exists only to let us retrieve the configuration blob so that the manifest package can dig
@ -468,7 +579,7 @@ func (s *storageImageDestination) getConfigBlob(info types.BlobInfo) ([]byte, er
return nil, fmt.Errorf("invalid digest supplied when reading blob: %w", err)
}
// Assume it's a file, since we're only calling this from a place that expects to read files.
if filename, ok := s.filenames[info.Digest]; ok {
if filename, ok := s.lockProtected.filenames[info.Digest]; ok {
contents, err2 := os.ReadFile(filename)
if err2 != nil {
return nil, fmt.Errorf(`reading blob from file %q: %w`, filename, err2)
@ -502,23 +613,23 @@ func (s *storageImageDestination) queueOrCommit(index int, info addedLayerInfo)
// caller is the "worker" routine committing layers. All other routines
// can continue pulling and queuing in layers.
s.lock.Lock()
s.indexToAddedLayerInfo[index] = info
s.lockProtected.indexToAddedLayerInfo[index] = info
// We're still waiting for at least one previous/parent layer to be
// committed, so there's nothing to do.
if index != s.currentIndex {
if index != s.lockProtected.currentIndex {
s.lock.Unlock()
return nil
}
for {
info, ok := s.indexToAddedLayerInfo[index]
info, ok := s.lockProtected.indexToAddedLayerInfo[index]
if !ok {
break
}
s.lock.Unlock()
// Note: commitLayer locks on-demand.
if err := s.commitLayer(index, info, -1); err != nil {
if stopQueue, err := s.commitLayer(index, info, -1); stopQueue || err != nil {
return err
}
s.lock.Lock()
@ -527,185 +638,337 @@ func (s *storageImageDestination) queueOrCommit(index int, info addedLayerInfo)
// Set the index at the very end to make sure that only one routine
// enters stage 2).
s.currentIndex = index
s.lockProtected.currentIndex = index
s.lock.Unlock()
return nil
}
// singleLayerIDComponent returns a single layers the input to computing a layer (chain) ID,
// and an indication whether the input already has the shape of a layer ID.
// It returns ("", false) if the layer is not found at all (which should never happen)
func (s *storageImageDestination) singleLayerIDComponent(layerIndex int, blobDigest digest.Digest) (string, bool) {
s.lock.Lock()
defer s.lock.Unlock()
if d, found := s.lockProtected.indexToTOCDigest[layerIndex]; found {
return "@TOC=" + d.Hex(), false // "@" is not a valid start of a digest.Digest, so this is unambiguous.
}
if d, found := s.lockProtected.blobDiffIDs[blobDigest]; found {
return d.Hex(), true // This looks like chain IDs, and it uses the traditional value.
}
return "", false
}
// commitLayer commits the specified layer with the given index to the storage.
// size can usually be -1; it can be provided if the layer is not known to be already present in blobDiffIDs.
//
// If the layer cannot be committed yet, the function returns (true, nil).
//
// Note that the previous layer is expected to already be committed.
//
// Caution: this function must be called without holding `s.lock`. Callers
// must guarantee that, at any given time, at most one goroutine may execute
// `commitLayer()`.
func (s *storageImageDestination) commitLayer(index int, info addedLayerInfo, size int64) error {
func (s *storageImageDestination) commitLayer(index int, info addedLayerInfo, size int64) (bool, error) {
// Already committed? Return early.
if _, alreadyCommitted := s.indexToStorageID[index]; alreadyCommitted {
return nil
return false, nil
}
// Start with an empty string or the previous layer ID. Note that
// `s.indexToStorageID` can only be accessed by *one* goroutine at any
// given time. Hence, we don't need to lock accesses.
var lastLayer string
if prev := s.indexToStorageID[index-1]; prev != nil {
lastLayer = *prev
var parentLayer string
if index != 0 {
prev, ok := s.indexToStorageID[index-1]
if !ok {
return false, fmt.Errorf("Internal error: commitLayer called with previous layer %d not committed yet", index-1)
}
parentLayer = prev
}
// Carry over the previous ID for empty non-base layers.
if info.emptyLayer {
s.indexToStorageID[index] = &lastLayer
return nil
s.indexToStorageID[index] = parentLayer
return false, nil
}
// Check if there's already a layer with the ID that we'd give to the result of applying
// this layer blob to its parent, if it has one, or the blob's hex value otherwise.
s.lock.Lock()
diffID, haveDiffID := s.blobDiffIDs[info.digest]
s.lock.Unlock()
if !haveDiffID {
// Check if it's elsewhere and the caller just forgot to pass it to us in a PutBlob(),
// or to even check if we had it.
// Use none.NoCache to avoid a repeated DiffID lookup in the BlobInfoCache; a caller
// The layerID refers either to the DiffID or the digest of the TOC.
layerIDComponent, layerIDComponentStandalone := s.singleLayerIDComponent(index, info.digest)
if layerIDComponent == "" {
// Check if it's elsewhere and the caller just forgot to pass it to us in a PutBlob() / TryReusingBlob() / …
//
// Use none.NoCache to avoid a repeated DiffID lookup in the BlobInfoCache: a caller
// that relies on using a blob digest that has never been seen by the store had better call
// TryReusingBlob; not calling PutBlob already violates the documented API, so theres only
// so far we are going to accommodate that (if we should be doing that at all).
logrus.Debugf("looking for diffID for blob %+v", info.digest)
//
// We are also ignoring lookups by TOC, and other non-trivial situations.
// Those can only happen using the c/image/internal/private API,
// so those internal callers should be fixed to follow the API instead of expanding this fallback.
logrus.Debugf("looking for diffID for blob=%+v", info.digest)
// Use tryReusingBlobAsPending, not the top-level TryReusingBlobWithOptions, to prevent recursion via queueOrCommit.
has, _, err := s.tryReusingBlobAsPending(info.digest, size, &private.TryReusingBlobOptions{
Cache: none.NoCache,
CanSubstitute: false,
})
if err != nil {
return fmt.Errorf("checking for a layer based on blob %q: %w", info.digest.String(), err)
return false, fmt.Errorf("checking for a layer based on blob %q: %w", info.digest.String(), err)
}
if !has {
return fmt.Errorf("error determining uncompressed digest for blob %q", info.digest.String())
return false, fmt.Errorf("error determining uncompressed digest for blob %q", info.digest.String())
}
diffID, haveDiffID = s.blobDiffIDs[info.digest]
if !haveDiffID {
return fmt.Errorf("we have blob %q, but don't know its uncompressed digest", info.digest.String())
layerIDComponent, layerIDComponentStandalone = s.singleLayerIDComponent(index, info.digest)
if layerIDComponent == "" {
return false, fmt.Errorf("we have blob %q, but don't know its layer ID", info.digest.String())
}
}
id := diffID.Hex()
if lastLayer != "" {
id = digest.Canonical.FromBytes([]byte(lastLayer + "+" + diffID.Hex())).Hex()
id := layerIDComponent
if !layerIDComponentStandalone || parentLayer != "" {
id = digest.Canonical.FromString(parentLayer + "+" + layerIDComponent).Hex()
}
if layer, err2 := s.imageRef.transport.store.Layer(id); layer != nil && err2 == nil {
// There's already a layer that should have the right contents, just reuse it.
lastLayer = layer.ID
s.indexToStorageID[index] = &lastLayer
return nil
s.indexToStorageID[index] = layer.ID
return false, nil
}
s.lock.Lock()
diffOutput, ok := s.diffOutputs[info.digest]
s.lock.Unlock()
if ok {
layer, err := s.imageRef.transport.store.CreateLayer(id, lastLayer, nil, "", false, nil)
if err != nil {
return err
}
// FIXME: what to do with the uncompressed digest?
diffOutput.UncompressedDigest = info.digest
if err := s.imageRef.transport.store.ApplyDiffFromStagingDirectory(layer.ID, diffOutput.Target, diffOutput, nil); err != nil {
_ = s.imageRef.transport.store.Delete(layer.ID)
return err
}
s.indexToStorageID[index] = &layer.ID
return nil
layer, err := s.createNewLayer(index, info.digest, parentLayer, id)
if err != nil {
return false, err
}
if layer == nil {
return true, nil
}
s.indexToStorageID[index] = layer.ID
return false, nil
}
// createNewLayer creates a new layer newLayerID for (index, layerDigest) on top of parentLayer (which may be "").
// If the layer cannot be committed yet, the function returns (nil, nil).
func (s *storageImageDestination) createNewLayer(index int, layerDigest digest.Digest, parentLayer, newLayerID string) (*storage.Layer, error) {
s.lock.Lock()
al, ok := s.blobAdditionalLayer[info.digest]
diffOutput, ok := s.lockProtected.diffOutputs[index]
s.lock.Unlock()
if ok {
layer, err := al.PutAs(id, lastLayer, nil)
var untrustedUncompressedDigest digest.Digest
if diffOutput.UncompressedDigest == "" {
d, err := s.untrustedLayerDiffID(index)
if err != nil {
return nil, err
}
if d == "" {
logrus.Debugf("Skipping commit for layer %q, manifest not yet available", newLayerID)
return nil, nil
}
untrustedUncompressedDigest = d
}
flags := make(map[string]interface{})
if untrustedUncompressedDigest != "" {
flags[expectedLayerDiffIDFlag] = untrustedUncompressedDigest
logrus.Debugf("Setting uncompressed digest to %q for layer %q", untrustedUncompressedDigest, newLayerID)
}
args := storage.ApplyStagedLayerOptions{
ID: newLayerID,
ParentLayer: parentLayer,
DiffOutput: diffOutput,
DiffOptions: &graphdriver.ApplyDiffWithDifferOpts{
Flags: flags,
},
}
layer, err := s.imageRef.transport.store.ApplyStagedLayer(args)
if err != nil && !errors.Is(err, storage.ErrDuplicateID) {
return fmt.Errorf("failed to put layer from digest and labels: %w", err)
return nil, fmt.Errorf("failed to put layer using a partial pull: %w", err)
}
lastLayer = layer.ID
s.indexToStorageID[index] = &lastLayer
return nil
return layer, nil
}
s.lock.Lock()
al, ok := s.lockProtected.blobAdditionalLayer[layerDigest]
s.lock.Unlock()
if ok {
layer, err := al.PutAs(newLayerID, parentLayer, nil)
if err != nil && !errors.Is(err, storage.ErrDuplicateID) {
return nil, fmt.Errorf("failed to put layer from digest and labels: %w", err)
}
return layer, nil
}
// Check if we previously cached a file with that blob's contents. If we didn't,
// then we need to read the desired contents from a layer.
var trustedUncompressedDigest, trustedOriginalDigest digest.Digest // For storage.LayerOptions
s.lock.Lock()
filename, ok := s.filenames[info.digest]
tocDigest := s.lockProtected.indexToTOCDigest[index] // "" if not set
optionalDiffID := s.lockProtected.blobDiffIDs[layerDigest] // "" if not set
filename, gotFilename := s.lockProtected.filenames[layerDigest]
s.lock.Unlock()
if !ok {
// Try to find the layer with contents matching that blobsum.
layer := ""
layers, err2 := s.imageRef.transport.store.LayersByUncompressedDigest(diffID)
if err2 == nil && len(layers) > 0 {
layer = layers[0].ID
} else {
layers, err2 = s.imageRef.transport.store.LayersByCompressedDigest(info.digest)
if gotFilename && tocDigest == "" {
// If tocDigest != "", if we now happen to find a layerDigest match, the newLayerID has already been computed as TOC-based,
// and we don't know the relationship of the layerDigest and TOC digest.
// We could recompute newLayerID to be DiffID-based and use the file, but such a within-image layer
// reuse is expected to be pretty rare; instead, ignore the unexpected file match and proceed to the
// originally-planned TOC match.
// Because tocDigest == "", optionaldiffID must have been set; and even if it werent, PutLayer will recompute the digest from the stream.
trustedUncompressedDigest = optionalDiffID
trustedOriginalDigest = layerDigest // The code setting .filenames[layerDigest] is responsible for the contents matching.
} else {
// Try to find the layer with contents matching the data we use.
var layer *storage.Layer // = nil
if tocDigest != "" {
layers, err2 := s.imageRef.transport.store.LayersByTOCDigest(tocDigest)
if err2 == nil && len(layers) > 0 {
layer = layers[0].ID
layer = &layers[0]
} else {
return nil, fmt.Errorf("locating layer for TOC digest %q: %w", tocDigest, err2)
}
} else {
// Because tocDigest == "", optionaldiffID must have been set
layers, err2 := s.imageRef.transport.store.LayersByUncompressedDigest(optionalDiffID)
if err2 == nil && len(layers) > 0 {
layer = &layers[0]
} else {
layers, err2 = s.imageRef.transport.store.LayersByCompressedDigest(layerDigest)
if err2 == nil && len(layers) > 0 {
layer = &layers[0]
}
}
if layer == nil {
return nil, fmt.Errorf("locating layer for blob %q: %w", layerDigest, err2)
}
}
if layer == "" {
return fmt.Errorf("locating layer for blob %q: %w", info.digest, err2)
}
// Read the layer's contents.
noCompression := archive.Uncompressed
diffOptions := &storage.DiffOptions{
Compression: &noCompression,
}
diff, err2 := s.imageRef.transport.store.Diff("", layer, diffOptions)
diff, err2 := s.imageRef.transport.store.Diff("", layer.ID, diffOptions)
if err2 != nil {
return fmt.Errorf("reading layer %q for blob %q: %w", layer, info.digest, err2)
return nil, fmt.Errorf("reading layer %q for blob %q: %w", layer.ID, layerDigest, err2)
}
// Copy the layer diff to a file. Diff() takes a lock that it holds
// until the ReadCloser that it returns is closed, and PutLayer() wants
// the same lock, so the diff can't just be directly streamed from one
// to the other.
filename = s.computeNextBlobCacheFile()
file, err := os.OpenFile(filename, os.O_CREATE|os.O_TRUNC|os.O_WRONLY|os.O_EXCL, 0600)
file, err := os.OpenFile(filename, os.O_CREATE|os.O_TRUNC|os.O_WRONLY|os.O_EXCL, 0o600)
if err != nil {
diff.Close()
return fmt.Errorf("creating temporary file %q: %w", filename, err)
return nil, fmt.Errorf("creating temporary file %q: %w", filename, err)
}
// Copy the data to the file.
// TODO: This can take quite some time, and should ideally be cancellable using
// ctx.Done().
_, err = io.Copy(file, diff)
fileSize, err := io.Copy(file, diff)
diff.Close()
file.Close()
if err != nil {
return fmt.Errorf("storing blob to file %q: %w", filename, err)
return nil, fmt.Errorf("storing blob to file %q: %w", filename, err)
}
if optionalDiffID == "" && layer.UncompressedDigest != "" {
optionalDiffID = layer.UncompressedDigest
}
// The stream we have is uncompressed, this matches contents of the stream.
// If tocDigest != "", trustedUncompressedDigest might still be ""; in that case PutLayer will compute the value from the stream.
trustedUncompressedDigest = optionalDiffID
// FIXME? trustedOriginalDigest could be set to layerDigest IF tocDigest == "" (otherwise layerDigest is untrusted).
// But for c/storage to reasonably use it (as a CompressedDigest value), we should also ensure the CompressedSize of the created
// layer is correct, and the API does not currently make it possible (.CompressedSize is set from the input stream).
//
// We can legitimately set storage.LayerOptions.OriginalDigest to "",
// but that would just result in PutLayer computing the digest of the input stream == optionalDiffID.
// So, instead, set .OriginalDigest to the value we know already, to avoid that digest computation.
trustedOriginalDigest = optionalDiffID
// Allow using the already-collected layer contents without extracting the layer again.
//
// This only matches against the uncompressed digest.
// We dont have the original compressed data here to trivially set filenames[layerDigest].
// In particular we cant achieve the correct Layer.CompressedSize value with the current c/storage API.
// Within-image layer reuse is probably very rare, for now we prefer to avoid that complexity.
if trustedUncompressedDigest != "" {
s.lock.Lock()
s.lockProtected.blobDiffIDs[trustedUncompressedDigest] = trustedUncompressedDigest
s.lockProtected.filenames[trustedUncompressedDigest] = filename
s.lockProtected.fileSizes[trustedUncompressedDigest] = fileSize
s.lock.Unlock()
}
// Make sure that we can find this file later, should we need the layer's
// contents again.
s.lock.Lock()
s.filenames[info.digest] = filename
s.lock.Unlock()
}
// Read the cached blob and use it as a diff.
file, err := os.Open(filename)
if err != nil {
return fmt.Errorf("opening file %q: %w", filename, err)
return nil, fmt.Errorf("opening file %q: %w", filename, err)
}
defer file.Close()
// Build the new layer using the diff, regardless of where it came from.
// TODO: This can take quite some time, and should ideally be cancellable using ctx.Done().
layer, _, err := s.imageRef.transport.store.PutLayer(id, lastLayer, nil, "", false, &storage.LayerOptions{
OriginalDigest: info.digest,
UncompressedDigest: diffID,
layer, _, err := s.imageRef.transport.store.PutLayer(newLayerID, parentLayer, nil, "", false, &storage.LayerOptions{
OriginalDigest: trustedOriginalDigest,
UncompressedDigest: trustedUncompressedDigest,
}, file)
if err != nil && !errors.Is(err, storage.ErrDuplicateID) {
return fmt.Errorf("adding layer with blob %q: %w", info.digest, err)
return nil, fmt.Errorf("adding layer with blob %q: %w", layerDigest, err)
}
return layer, nil
}
// untrustedLayerDiffID returns a DiffID value for layerIndex from the images config.
// If the value is not yet available (but it can be available after s.manifets is set), it returns ("", nil).
// WARNING: We dont validate the DiffID value against the layer contents; it must not be used for any deduplication.
func (s *storageImageDestination) untrustedLayerDiffID(layerIndex int) (digest.Digest, error) {
// At this point, we are either inside the multi-threaded scope of HasThreadSafePutBlob, and
// nothing is writing to s.manifest yet, or PutManifest has been called and s.manifest != nil.
// Either way this function does not need the protection of s.lock.
if s.manifest == nil {
logrus.Debugf("Skipping commit for layer %d, manifest not yet available", layerIndex)
return "", nil
}
s.indexToStorageID[index] = &layer.ID
return nil
if s.untrustedDiffIDValues == nil {
mt := manifest.GuessMIMEType(s.manifest)
if mt != imgspecv1.MediaTypeImageManifest {
// We could, in principle, build an ImageSource, support arbitrary image formats using image.FromUnparsedImage,
// and then use types.Image.OCIConfig so that we can parse the image.
//
// In practice, this should, right now, only matter for pulls of OCI images (this code path implies that a layer has annotation),
// while converting to a non-OCI formats, using a manual (skopeo copy) or something similar, not (podman pull).
// So it is not implemented yet.
return "", fmt.Errorf("determining DiffID for manifest type %q is not yet supported", mt)
}
man, err := manifest.FromBlob(s.manifest, mt)
if err != nil {
return "", fmt.Errorf("parsing manifest: %w", err)
}
cb, err := s.getConfigBlob(man.ConfigInfo())
if err != nil {
return "", err
}
// retrieve the expected uncompressed digest from the config blob.
configOCI := &imgspecv1.Image{}
if err := json.Unmarshal(cb, configOCI); err != nil {
return "", err
}
s.untrustedDiffIDValues = slices.Clone(configOCI.RootFS.DiffIDs)
if s.untrustedDiffIDValues == nil { // Unlikely but possible in theory…
s.untrustedDiffIDValues = []digest.Digest{}
}
}
if layerIndex >= len(s.untrustedDiffIDValues) {
return "", fmt.Errorf("image config has only %d DiffID values, but a layer with index %d exists", len(s.untrustedDiffIDValues), layerIndex)
}
return s.untrustedDiffIDValues[layerIndex], nil
}
// Commit marks the process of storing the image as successful and asks for the image to be persisted.
@ -716,6 +979,8 @@ func (s *storageImageDestination) commitLayer(index int, info addedLayerInfo, si
// - Uploaded data MAY be visible to others before Commit() is called
// - Uploaded data MAY be removed or MAY remain around if Close() is called without Commit() (i.e. rollback is allowed but not guaranteed)
func (s *storageImageDestination) Commit(ctx context.Context, unparsedToplevel types.UnparsedImage) error {
// This function is outside of the scope of HasThreadSafePutBlob, so we dont need to hold s.lock.
if len(s.manifest) == 0 {
return errors.New("Internal error: storageImageDestination.Commit() called without PutManifest()")
}
@ -752,20 +1017,22 @@ func (s *storageImageDestination) Commit(ctx context.Context, unparsedToplevel t
// Extract, commit, or find the layers.
for i, blob := range layerBlobs {
if err := s.commitLayer(i, addedLayerInfo{
if stopQueue, err := s.commitLayer(i, addedLayerInfo{
digest: blob.Digest,
emptyLayer: blob.EmptyLayer,
}, blob.Size); err != nil {
return err
} else if stopQueue {
return fmt.Errorf("Internal error: storageImageDestination.Commit(): commitLayer() not ready to commit for layer %q", blob.Digest)
}
}
var lastLayer string
if len(layerBlobs) > 0 { // Can happen when using caches
prev := s.indexToStorageID[len(layerBlobs)-1]
if prev == nil {
if len(layerBlobs) > 0 { // Zero-layer images rarely make sense, but it is technically possible, and may happen for non-image artifacts.
prev, ok := s.indexToStorageID[len(layerBlobs)-1]
if !ok {
return fmt.Errorf("Internal error: storageImageDestination.Commit(): previous layer %d hasn't been committed (lastLayer == nil)", len(layerBlobs)-1)
}
lastLayer = *prev
lastLayer = prev
}
// If one of those blobs was a configuration blob, then we can try to dig out the date when the image
@ -779,14 +1046,14 @@ func (s *storageImageDestination) Commit(ctx context.Context, unparsedToplevel t
// Set up to save the non-layer blobs as data items. Since we only share layers, they should all be in files, so
// we just need to screen out the ones that are actually layers to get the list of non-layers.
dataBlobs := set.New[digest.Digest]()
for blob := range s.filenames {
for blob := range s.lockProtected.filenames {
dataBlobs.Add(blob)
}
for _, layerBlob := range layerBlobs {
dataBlobs.Delete(layerBlob.Digest)
}
for _, blob := range dataBlobs.Values() {
v, err := os.ReadFile(s.filenames[blob])
v, err := os.ReadFile(s.lockProtected.filenames[blob])
if err != nil {
return fmt.Errorf("copying non-layer blob %q to image: %w", blob, err)
}
@ -839,7 +1106,7 @@ func (s *storageImageDestination) Commit(ctx context.Context, unparsedToplevel t
}
// Set up to save our metadata.
metadata, err := json.Marshal(s)
metadata, err := json.Marshal(s.metadata)
if err != nil {
return fmt.Errorf("encoding metadata for image: %w", err)
}
@ -944,7 +1211,7 @@ func (s *storageImageDestination) PutSignaturesWithFormat(ctx context.Context, s
}
if instanceDigest == nil {
s.signatures = sigblob
s.SignatureSizes = sizes
s.metadata.SignatureSizes = sizes
if len(s.manifest) > 0 {
manifestDigest := s.manifestDigest
instanceDigest = &manifestDigest
@ -952,7 +1219,7 @@ func (s *storageImageDestination) PutSignaturesWithFormat(ctx context.Context, s
}
if instanceDigest != nil {
s.signatureses[*instanceDigest] = sigblob
s.SignaturesSizes[*instanceDigest] = sizes
s.metadata.SignaturesSizes[*instanceDigest] = sizes
}
return nil
}

View file

@ -18,11 +18,6 @@ var (
ErrNoSuchImage = storage.ErrNotAnImage
)
type storageImageCloser struct {
types.ImageCloser
size int64
}
// manifestBigDataKey returns a key suitable for recording a manifest with the specified digest using storage.Store.ImageBigData and related functions.
// If a specific manifest digest is explicitly requested by the user, the key returned by this function should be used preferably;
// for compatibility, if a manifest is not available under this key, check also storage.ImageDigestBigDataKey
@ -36,6 +31,17 @@ func signatureBigDataKey(digest digest.Digest) string {
return "signature-" + digest.Encoded()
}
// storageImageMetadata is stored, as JSON, in storage.Image.Metadata
type storageImageMetadata struct {
SignatureSizes []int `json:"signature-sizes,omitempty"` // List of sizes of each signature slice
SignaturesSizes map[digest.Digest][]int `json:"signatures-sizes,omitempty"` // Sizes of each manifest's signature slice
}
type storageImageCloser struct {
types.ImageCloser
size int64
}
// Size() returns the previously-computed size of the image, with no error.
func (s *storageImageCloser) Size() (int64, error) {
return s.size, nil

View file

@ -34,16 +34,30 @@ type storageImageSource struct {
impl.PropertyMethodsInitialize
stubs.NoGetBlobAtInitialize
imageRef storageReference
image *storage.Image
systemContext *types.SystemContext // SystemContext used in GetBlob() to create temporary files
layerPosition map[digest.Digest]int // Where we are in reading a blob's layers
cachedManifest []byte // A cached copy of the manifest, if already known, or nil
getBlobMutex sync.Mutex // Mutex to sync state for parallel GetBlob executions
SignatureSizes []int `json:"signature-sizes,omitempty"` // List of sizes of each signature slice
SignaturesSizes map[digest.Digest][]int `json:"signatures-sizes,omitempty"` // List of sizes of each signature slice
imageRef storageReference
image *storage.Image
systemContext *types.SystemContext // SystemContext used in GetBlob() to create temporary files
metadata storageImageMetadata
cachedManifest []byte // A cached copy of the manifest, if already known, or nil
getBlobMutex sync.Mutex // Mutex to sync state for parallel GetBlob executions
getBlobMutexProtected getBlobMutexProtected
}
// getBlobMutexProtected contains storageImageSource data protected by getBlobMutex.
type getBlobMutexProtected struct {
// digestToLayerID is a lookup map from a possibly-untrusted uncompressed layer digest (as returned by LayerInfosForCopy) to the
// layer ID in the store.
digestToLayerID map[digest.Digest]string
// layerPosition stores where we are in reading a blob's layers
layerPosition map[digest.Digest]int
}
// expectedLayerDiffIDFlag is a per-layer flag containing an UNTRUSTED uncompressed digest of the layer.
// It is set when pulling a layer by TOC; later, this value is used with digestToLayerID
// to allow identifying the layer — and the consumer is expected to verify the blob returned by GetBlob against the digest.
const expectedLayerDiffIDFlag = "expected-layer-diffid"
// newImageSource sets up an image for reading.
func newImageSource(sys *types.SystemContext, imageRef storageReference) (*storageImageSource, error) {
// First, locate the image.
@ -59,16 +73,21 @@ func newImageSource(sys *types.SystemContext, imageRef storageReference) (*stora
}),
NoGetBlobAtInitialize: stubs.NoGetBlobAt(imageRef),
imageRef: imageRef,
systemContext: sys,
image: img,
layerPosition: make(map[digest.Digest]int),
SignatureSizes: []int{},
SignaturesSizes: make(map[digest.Digest][]int),
imageRef: imageRef,
systemContext: sys,
image: img,
metadata: storageImageMetadata{
SignatureSizes: []int{},
SignaturesSizes: make(map[digest.Digest][]int),
},
getBlobMutexProtected: getBlobMutexProtected{
digestToLayerID: make(map[digest.Digest]string),
layerPosition: make(map[digest.Digest]int),
},
}
image.Compat = impl.AddCompat(image)
if img.Metadata != "" {
if err := json.Unmarshal([]byte(img.Metadata), image); err != nil {
if err := json.Unmarshal([]byte(img.Metadata), &image.metadata); err != nil {
return nil, fmt.Errorf("decoding metadata for source image: %w", err)
}
}
@ -91,6 +110,7 @@ func (s *storageImageSource) Close() error {
func (s *storageImageSource) GetBlob(ctx context.Context, info types.BlobInfo, cache types.BlobInfoCache) (rc io.ReadCloser, n int64, err error) {
// We need a valid digest value.
digest := info.Digest
err = digest.Validate()
if err != nil {
return nil, 0, err
@ -100,10 +120,25 @@ func (s *storageImageSource) GetBlob(ctx context.Context, info types.BlobInfo, c
return io.NopCloser(bytes.NewReader(image.GzippedEmptyLayer)), int64(len(image.GzippedEmptyLayer)), nil
}
// Check if the blob corresponds to a diff that was used to initialize any layers. Our
// callers should try to retrieve layers using their uncompressed digests, so no need to
// check if they're using one of the compressed digests, which we can't reproduce anyway.
layers, _ := s.imageRef.transport.store.LayersByUncompressedDigest(digest)
var layers []storage.Layer
// This lookup path is strictly necessary for layers identified by TOC digest
// (where LayersByUncompressedDigest might not find our layer);
// for other layers it is an optimization to avoid the cost of the LayersByUncompressedDigest call.
s.getBlobMutex.Lock()
layerID, found := s.getBlobMutexProtected.digestToLayerID[digest]
s.getBlobMutex.Unlock()
if found {
if layer, err := s.imageRef.transport.store.Layer(layerID); err == nil {
layers = []storage.Layer{*layer}
}
} else {
// Check if the blob corresponds to a diff that was used to initialize any layers. Our
// callers should try to retrieve layers using their uncompressed digests, so no need to
// check if they're using one of the compressed digests, which we can't reproduce anyway.
layers, _ = s.imageRef.transport.store.LayersByUncompressedDigest(digest)
}
// If it's not a layer, then it must be a data item.
if len(layers) == 0 {
@ -174,8 +209,8 @@ func (s *storageImageSource) getBlobAndLayerID(digest digest.Digest, layers []st
// which claim to have the same contents, that we actually do have multiple layers, otherwise we could
// just go ahead and use the first one every time.
s.getBlobMutex.Lock()
i := s.layerPosition[digest]
s.layerPosition[digest] = i + 1
i := s.getBlobMutexProtected.layerPosition[digest]
s.getBlobMutexProtected.layerPosition[digest] = i + 1
s.getBlobMutex.Unlock()
if len(layers) > 0 {
layer = layers[i%len(layers)]
@ -267,14 +302,35 @@ func (s *storageImageSource) LayerInfosForCopy(ctx context.Context, instanceDige
if err != nil {
return nil, fmt.Errorf("reading layer %q in image %q: %w", layerID, s.image.ID, err)
}
if layer.UncompressedDigest == "" {
return nil, fmt.Errorf("uncompressed digest for layer %q is unknown", layerID)
}
if layer.UncompressedSize < 0 {
return nil, fmt.Errorf("uncompressed size for layer %q is unknown", layerID)
}
blobDigest := layer.UncompressedDigest
if blobDigest == "" {
if layer.TOCDigest == "" {
return nil, fmt.Errorf("uncompressed digest and TOC digest for layer %q is unknown", layerID)
}
if layer.Flags == nil || layer.Flags[expectedLayerDiffIDFlag] == nil {
return nil, fmt.Errorf("TOC digest %q for layer %q is present but %q flag is not set", layer.TOCDigest, layerID, expectedLayerDiffIDFlag)
}
expectedDigest, ok := layer.Flags[expectedLayerDiffIDFlag].(string)
if !ok {
return nil, fmt.Errorf("TOC digest %q for layer %q is present but %q flag is not a string", layer.TOCDigest, layerID, expectedLayerDiffIDFlag)
}
// If the layer is stored by its TOC, report the expected diffID as the layer Digest;
// the generic code is responsible for validating the digest.
// We can locate the layer without further c/storage help using s.getBlobMutexProtected.digestToLayerID.
blobDigest, err = digest.Parse(expectedDigest)
if err != nil {
return nil, fmt.Errorf("parsing expected diffID %q for layer %q: %w", expectedDigest, layerID, err)
}
}
s.getBlobMutex.Lock()
s.getBlobMutexProtected.digestToLayerID[blobDigest] = layer.ID
s.getBlobMutex.Unlock()
blobInfo := types.BlobInfo{
Digest: layer.UncompressedDigest,
Digest: blobDigest,
Size: layer.UncompressedSize,
MediaType: uncompressedLayerType,
}
@ -324,11 +380,11 @@ func buildLayerInfosForCopy(manifestInfos []manifest.LayerInfo, physicalInfos []
func (s *storageImageSource) GetSignaturesWithFormat(ctx context.Context, instanceDigest *digest.Digest) ([]signature.Signature, error) {
var offset int
signatureBlobs := []byte{}
signatureSizes := s.SignatureSizes
signatureSizes := s.metadata.SignatureSizes
key := "signatures"
instance := "default instance"
if instanceDigest != nil {
signatureSizes = s.SignaturesSizes[*instanceDigest]
signatureSizes = s.metadata.SignaturesSizes[*instanceDigest]
key = signatureBigDataKey(*instanceDigest)
instance = instanceDigest.Encoded()
}
@ -374,7 +430,7 @@ func (s *storageImageSource) getSize() (int64, error) {
sum += bigSize
}
// Add the signature sizes.
for _, sigSize := range s.SignatureSizes {
for _, sigSize := range s.metadata.SignatureSizes {
sum += int64(sigSize)
}
// Walk the layer list.
@ -384,7 +440,7 @@ func (s *storageImageSource) getSize() (int64, error) {
if err != nil {
return -1, err
}
if layer.UncompressedDigest == "" || layer.UncompressedSize < 0 {
if (layer.TOCDigest == "" && layer.UncompressedDigest == "") || layer.UncompressedSize < 0 {
return -1, fmt.Errorf("size for layer %q is unknown, failing getSize()", layerID)
}
sum += layer.UncompressedSize

View file

@ -213,7 +213,7 @@ func (s *storageTransport) GetStore() (storage.Store, error) {
// Return the transport's previously-set store. If we don't have one
// of those, initialize one now.
if s.store == nil {
options, err := storage.DefaultStoreOptionsAutoDetectUID()
options, err := storage.DefaultStoreOptions()
if err != nil {
return nil, err
}

View file

@ -135,8 +135,8 @@ type BlobInfo struct {
// CompressionOperation is used in Image.UpdateLayerInfos to instruct
// whether the original layer's "compressed or not" should be preserved,
// possibly while changing the compression algorithm from one to another,
// or if it should be compressed or decompressed. The field defaults to
// preserve the original layer's compressedness.
// or if it should be changed to compressed or decompressed.
// The field defaults to preserve the original layer's compressedness.
// TODO: To remove together with CryptoOperation in re-design to remove
// field out of BlobInfo.
CompressionOperation LayerCompression

View file

@ -6,9 +6,9 @@ const (
// VersionMajor is for an API incompatible changes
VersionMajor = 5
// VersionMinor is for functionality in a backwards-compatible manner
VersionMinor = 29
VersionMinor = 30
// VersionPatch is for backwards-compatible bug fixes
VersionPatch = 2
VersionPatch = 0
// VersionDev indicates development branch. Releases will be empty string.
VersionDev = ""

View file

@ -17,13 +17,13 @@ env:
####
#### Cache-image names to test with (double-quotes around names are critical)
###
FEDORA_NAME: "fedora-39ß"
FEDORA_NAME: "fedora-39"
DEBIAN_NAME: "debian-13"
# GCE project where images live
IMAGE_PROJECT: "libpod-218412"
# VM Image built in containers/automation_images
IMAGE_SUFFIX: "c20231004t194547z-f39f38d13"
IMAGE_SUFFIX: "c20240102t155643z-f39f38d13"
FEDORA_CACHE_IMAGE_NAME: "fedora-${IMAGE_SUFFIX}"
DEBIAN_CACHE_IMAGE_NAME: "debian-${IMAGE_SUFFIX}"
@ -167,7 +167,7 @@ vendor_task:
cross_task:
container:
image: golang:1.19
image: golang:1.20
build_script: make cross
@ -181,6 +181,6 @@ success_task:
- vendor
- cross
container:
image: golang:1.19
image: golang:1.20
clone_script: 'mkdir -p "$CIRRUS_WORKING_DIR"' # Source code not needed
script: /bin/true

View file

@ -41,7 +41,7 @@ containers-storage: ## build using gc on the host
$(GO) build -compiler gc $(BUILDFLAGS) ./cmd/containers-storage
codespell:
codespell -S Makefile,build,buildah,buildah.spec,imgtype,copy,AUTHORS,bin,vendor,.git,go.sum,CHANGELOG.md,changelog.txt,seccomp.json,.cirrus.yml,"*.xz,*.gz,*.tar,*.tgz,*ico,*.png,*.1,*.5,*.orig,*.rej" -L worl,flate,uint,iff,od,ERRO -w
codespell -S Makefile,build,buildah,buildah.spec,imgtype,copy,AUTHORS,bin,vendor,.git,go.sum,CHANGELOG.md,changelog.txt,seccomp.json,.cirrus.yml,"*.xz,*.gz,*.tar,*.tgz,*ico,*.png,*.1,*.5,*.orig,*.rej" -L plack,worl,flate,uint,iff,od,ERRO -w
binary local-binary: containers-storage

View file

@ -1 +1 @@
1.51.0
1.53.0

View file

@ -73,6 +73,13 @@ type ApplyDiffOpts struct {
ForceMask *os.FileMode
}
// ApplyDiffWithDifferOpts contains optional arguments for ApplyDiffWithDiffer methods.
type ApplyDiffWithDifferOpts struct {
ApplyDiffOpts
Flags map[string]interface{}
}
// InitFunc initializes the storage driver.
type InitFunc func(homedir string, options Options) (Driver, error)
@ -189,6 +196,8 @@ type DriverWithDifferOutput struct {
BigData map[string][]byte
TarSplit []byte
TOCDigest digest.Digest
// RootDirMode is the mode of the root directory of the layer, if specified.
RootDirMode *os.FileMode
// Artifacts is a collection of additional artifacts
// generated by the differ that the storage driver can use.
Artifacts map[string]interface{}
@ -205,10 +214,26 @@ const (
DifferOutputFormatFlat
)
type DifferFsVerity int
const (
// DifferFsVerityDisabled means no fs-verity is used
DifferFsVerityDisabled = iota
// DifferFsVerityEnabled means fs-verity is used when supported
DifferFsVerityEnabled
// DifferFsVerityRequired means fs-verity is required
DifferFsVerityRequired
)
// DifferOptions overrides how the differ work
type DifferOptions struct {
// Format defines the destination directory layout format
Format DifferOutputFormat
// UseFsVerity defines whether fs-verity is used
UseFsVerity DifferFsVerity
}
// Differ defines the interface for using a custom differ.
@ -223,9 +248,9 @@ type DriverWithDiffer interface {
Driver
// ApplyDiffWithDiffer applies the changes using the callback function.
// If id is empty, then a staging directory is created. The staging directory is guaranteed to be usable with ApplyDiffFromStagingDirectory.
ApplyDiffWithDiffer(id, parent string, options *ApplyDiffOpts, differ Differ) (output DriverWithDifferOutput, err error)
// ApplyDiffFromStagingDirectory applies the changes using the specified staging directory.
ApplyDiffFromStagingDirectory(id, parent, stagingDirectory string, diffOutput *DriverWithDifferOutput, options *ApplyDiffOpts) error
ApplyDiffWithDiffer(id, parent string, options *ApplyDiffWithDifferOpts, differ Differ) (output DriverWithDifferOutput, err error)
// ApplyDiffFromStagingDirectory applies the changes using the diffOutput target directory.
ApplyDiffFromStagingDirectory(id, parent string, diffOutput *DriverWithDifferOutput, options *ApplyDiffWithDifferOpts) error
// CleanupStagingDirectory cleanups the staging directory. It can be used to cleanup the staging directory on errors
CleanupStagingDirectory(stagingDirectory string) error
// DifferTarget gets the location where files are stored for the layer.

View file

@ -1,5 +1,5 @@
//go:build linux && composefs && cgo
// +build linux,composefs,cgo
//go:build linux && cgo
// +build linux,cgo
package overlay
@ -7,15 +7,13 @@ import (
"encoding/binary"
"errors"
"fmt"
"io/fs"
"os"
"os/exec"
"path/filepath"
"sync"
"syscall"
"unsafe"
"github.com/containers/storage/pkg/chunked/dump"
"github.com/containers/storage/pkg/fsverity"
"github.com/containers/storage/pkg/loopback"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
@ -34,77 +32,6 @@ func getComposeFsHelper() (string, error) {
return composeFsHelperPath, composeFsHelperErr
}
func composeFsSupported() bool {
_, err := getComposeFsHelper()
return err == nil
}
func enableVerity(description string, fd int) error {
enableArg := unix.FsverityEnableArg{
Version: 1,
Hash_algorithm: unix.FS_VERITY_HASH_ALG_SHA256,
Block_size: 4096,
}
_, _, e1 := syscall.Syscall(unix.SYS_IOCTL, uintptr(fd), uintptr(unix.FS_IOC_ENABLE_VERITY), uintptr(unsafe.Pointer(&enableArg)))
if e1 != 0 && !errors.Is(e1, unix.EEXIST) {
return fmt.Errorf("failed to enable verity for %q: %w", description, e1)
}
return nil
}
type verityDigest struct {
Fsv unix.FsverityDigest
Buf [64]byte
}
func measureVerity(description string, fd int) (string, error) {
var digest verityDigest
digest.Fsv.Size = 64
_, _, e1 := syscall.Syscall(unix.SYS_IOCTL, uintptr(fd), uintptr(unix.FS_IOC_MEASURE_VERITY), uintptr(unsafe.Pointer(&digest)))
if e1 != 0 {
return "", fmt.Errorf("failed to measure verity for %q: %w", description, e1)
}
return fmt.Sprintf("%x", digest.Buf[:digest.Fsv.Size]), nil
}
func enableVerityRecursive(root string) (map[string]string, error) {
digests := make(map[string]string)
walkFn := func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
if !d.Type().IsRegular() {
return nil
}
f, err := os.Open(path)
if err != nil {
return err
}
defer f.Close()
if err := enableVerity(path, int(f.Fd())); err != nil {
return err
}
verity, err := measureVerity(path, int(f.Fd()))
if err != nil {
return err
}
relPath, err := filepath.Rel(root, path)
if err != nil {
return err
}
digests[relPath] = verity
return nil
}
err := filepath.WalkDir(root, walkFn)
return digests, err
}
func getComposefsBlob(dataDir string) string {
return filepath.Join(dataDir, "composefs.blob")
}
@ -156,7 +83,7 @@ func generateComposeFsBlob(verityDigests map[string]string, toc interface{}, com
return err
}
if err := enableVerity("manifest file", int(newFd.Fd())); err != nil && !errors.Is(err, unix.ENOTSUP) && !errors.Is(err, unix.ENOTTY) {
if err := fsverity.EnableVerity("manifest file", int(newFd.Fd())); err != nil && !errors.Is(err, unix.ENOTSUP) && !errors.Is(err, unix.ENOTTY) {
logrus.Warningf("%s", err)
}

View file

@ -1,24 +0,0 @@
//go:build !linux || !composefs || !cgo
// +build !linux !composefs !cgo
package overlay
import (
"fmt"
)
func composeFsSupported() bool {
return false
}
func generateComposeFsBlob(verityDigests map[string]string, toc interface{}, composefsDir string) error {
return fmt.Errorf("composefs is not supported")
}
func mountComposefsBlob(dataDir, mountPoint string) error {
return fmt.Errorf("composefs is not supported")
}
func enableVerityRecursive(path string) (map[string]string, error) {
return nil, fmt.Errorf("composefs is not supported")
}

View file

@ -82,7 +82,8 @@ const (
lowerFile = "lower"
maxDepth = 500
tocArtifact = "toc"
tocArtifact = "toc"
fsVerityDigestsArtifact = "fs-verity-digests"
// idLength represents the number of random characters
// which can be used to create the unique link identifier
@ -105,6 +106,7 @@ type overlayOptions struct {
mountOptions string
ignoreChownErrors bool
forceMask *os.FileMode
useComposefs bool
}
// Driver contains information about the home directory and the list of active mounts that are created using this driver.
@ -122,6 +124,7 @@ type Driver struct {
supportsDType bool
supportsVolatile *bool
usingMetacopy bool
usingComposefs bool
supportsIDMappedMounts *bool
}
@ -293,7 +296,7 @@ func isNetworkFileSystem(fsMagic graphdriver.FsMagic) bool {
// a bunch of network file systems...
case graphdriver.FsMagicNfsFs, graphdriver.FsMagicSmbFs, graphdriver.FsMagicAcfs,
graphdriver.FsMagicAfs, graphdriver.FsMagicCephFs, graphdriver.FsMagicCIFS,
graphdriver.FsMagicFHGFSFs, graphdriver.FsMagicGPFS, graphdriver.FsMagicIBRIX,
graphdriver.FsMagicGPFS, graphdriver.FsMagicIBRIX,
graphdriver.FsMagicKAFS, graphdriver.FsMagicLUSTRE, graphdriver.FsMagicNCP,
graphdriver.FsMagicNFSD, graphdriver.FsMagicOCFS2, graphdriver.FsMagicPANFS,
graphdriver.FsMagicPRLFS, graphdriver.FsMagicSMB2, graphdriver.FsMagicSNFS,
@ -307,16 +310,6 @@ func isNetworkFileSystem(fsMagic graphdriver.FsMagic) bool {
// If overlay filesystem is not supported on the host, a wrapped graphdriver.ErrNotSupported is returned as error.
// If an overlay filesystem is not supported over an existing filesystem then a wrapped graphdriver.ErrIncompatibleFS is returned.
func Init(home string, options graphdriver.Options) (graphdriver.Driver, error) {
// If custom --imagestore is selected never
// ditch the original graphRoot, instead add it as
// additionalImageStore so its images can still be
// read and used.
if options.ImageStore != "" {
graphRootAsAdditionalStore := fmt.Sprintf("AdditionalImageStore=%s", options.ImageStore)
options.DriverOptions = append(options.DriverOptions, graphRootAsAdditionalStore)
// complete base name with driver name included
options.ImageStore = filepath.Join(options.ImageStore, "overlay")
}
opts, err := parseOptions(options.DriverOptions)
if err != nil {
return nil, err
@ -387,6 +380,22 @@ func Init(home string, options graphdriver.Options) (graphdriver.Driver, error)
}
}
if opts.useComposefs {
if unshare.IsRootless() {
return nil, fmt.Errorf("composefs is not supported in user namespaces")
}
supportsDataOnly, err := supportsDataOnlyLayersCached(home, runhome)
if err != nil {
return nil, err
}
if !supportsDataOnly {
return nil, fmt.Errorf("composefs is not supported on this kernel: %w", graphdriver.ErrIncompatibleFS)
}
if _, err := getComposeFsHelper(); err != nil {
return nil, fmt.Errorf("composefs helper program not found: %w", err)
}
}
var usingMetacopy bool
var supportsDType bool
var supportsVolatile *bool
@ -448,6 +457,7 @@ func Init(home string, options graphdriver.Options) (graphdriver.Driver, error)
supportsDType: supportsDType,
usingMetacopy: usingMetacopy,
supportsVolatile: supportsVolatile,
usingComposefs: opts.useComposefs,
options: *opts,
}
@ -555,6 +565,12 @@ func parseOptions(options []string) (*overlayOptions, error) {
withReference: withReference,
})
}
case "use_composefs":
logrus.Debugf("overlay: use_composefs=%s", val)
o.useComposefs, err = strconv.ParseBool(val)
if err != nil {
return nil, err
}
case "mount_program":
logrus.Debugf("overlay: mount_program=%s", val)
if val != "" {
@ -782,7 +798,7 @@ func supportsOverlay(home string, homeMagic graphdriver.FsMagic, rootUID, rootGI
}
func (d *Driver) useNaiveDiff() bool {
if d.useComposeFs() {
if d.usingComposefs {
return true
}
@ -837,22 +853,15 @@ func (d *Driver) Status() [][2]string {
// Metadata returns meta data about the overlay driver such as
// LowerDir, UpperDir, WorkDir and MergeDir used to store data.
func (d *Driver) Metadata(id string) (map[string]string, error) {
dir, imagestore, _ := d.dir2(id)
dir := d.dir(id)
if _, err := os.Stat(dir); err != nil {
return nil, err
}
workDirBase := dir
if imagestore != "" {
if _, err := os.Stat(dir); err != nil {
return nil, err
}
workDirBase = imagestore
}
metadata := map[string]string{
"WorkDir": path.Join(workDirBase, "work"),
"MergedDir": path.Join(workDirBase, "merged"),
"UpperDir": path.Join(workDirBase, "diff"),
"WorkDir": path.Join(dir, "work"),
"MergedDir": path.Join(dir, "merged"),
"UpperDir": path.Join(dir, "diff"),
}
lowerDirs, err := d.getLowerDirs(id)
@ -870,7 +879,7 @@ func (d *Driver) Metadata(id string) (map[string]string, error) {
// is being shutdown. For now, we just have to unmount the bind mounted
// we had created.
func (d *Driver) Cleanup() error {
_ = os.RemoveAll(d.getStagingDir())
_ = os.RemoveAll(filepath.Join(d.home, stagingDir))
return mount.Unmount(d.home)
}
@ -966,8 +975,10 @@ func (d *Driver) Create(id, parent string, opts *graphdriver.CreateOpts) (retErr
return d.create(id, parent, opts, true)
}
func (d *Driver) create(id, parent string, opts *graphdriver.CreateOpts, disableQuota bool) (retErr error) {
dir, imageStore, _ := d.dir2(id)
func (d *Driver) create(id, parent string, opts *graphdriver.CreateOpts, readOnly bool) (retErr error) {
dir, homedir, _ := d.dir2(id, readOnly)
disableQuota := readOnly
uidMaps := d.uidMaps
gidMaps := d.gidMaps
@ -978,7 +989,7 @@ func (d *Driver) create(id, parent string, opts *graphdriver.CreateOpts, disable
}
// Make the link directory if it does not exist
if err := idtools.MkdirAllAs(path.Join(d.home, linkDir), 0o755, 0, 0); err != nil {
if err := idtools.MkdirAllAs(path.Join(homedir, linkDir), 0o755, 0, 0); err != nil {
return err
}
@ -995,20 +1006,8 @@ func (d *Driver) create(id, parent string, opts *graphdriver.CreateOpts, disable
if err := idtools.MkdirAllAndChownNew(path.Dir(dir), 0o755, idPair); err != nil {
return err
}
workDirBase := dir
if imageStore != "" {
workDirBase = imageStore
if err := idtools.MkdirAllAndChownNew(path.Dir(imageStore), 0o755, idPair); err != nil {
return err
}
}
if parent != "" {
parentBase, parentImageStore, inAdditionalStore := d.dir2(parent)
// If parentBase path is additional image store, select the image contained in parentBase.
// See https://github.com/containers/podman/issues/19748
if parentImageStore != "" && !inAdditionalStore {
parentBase = parentImageStore
}
parentBase := d.dir(parent)
st, err := system.Stat(filepath.Join(parentBase, "diff"))
if err != nil {
return err
@ -1029,11 +1028,6 @@ func (d *Driver) create(id, parent string, opts *graphdriver.CreateOpts, disable
if err := idtools.MkdirAllAndChownNew(dir, 0o700, idPair); err != nil {
return err
}
if imageStore != "" {
if err := idtools.MkdirAllAndChownNew(imageStore, 0o700, idPair); err != nil {
return err
}
}
defer func() {
// Clean up on failure
@ -1041,11 +1035,6 @@ func (d *Driver) create(id, parent string, opts *graphdriver.CreateOpts, disable
if err2 := os.RemoveAll(dir); err2 != nil {
logrus.Errorf("While recovering from a failure creating a layer, error deleting %#v: %v", dir, err2)
}
if imageStore != "" {
if err2 := os.RemoveAll(workDirBase); err2 != nil {
logrus.Errorf("While recovering from a failure creating a layer, error deleting %#v: %v", workDirBase, err2)
}
}
}
}()
@ -1068,11 +1057,6 @@ func (d *Driver) create(id, parent string, opts *graphdriver.CreateOpts, disable
if err := d.quotaCtl.SetQuota(dir, quota); err != nil {
return err
}
if imageStore != "" {
if err := d.quotaCtl.SetQuota(imageStore, quota); err != nil {
return err
}
}
}
perms := defaultPerms
@ -1081,12 +1065,7 @@ func (d *Driver) create(id, parent string, opts *graphdriver.CreateOpts, disable
}
if parent != "" {
parentBase, parentImageStore, inAdditionalStore := d.dir2(parent)
// If parentBase path is additional image store, select the image contained in parentBase.
// See https://github.com/containers/podman/issues/19748
if parentImageStore != "" && !inAdditionalStore {
parentBase = parentImageStore
}
parentBase := d.dir(parent)
st, err := system.Stat(filepath.Join(parentBase, "diff"))
if err != nil {
return err
@ -1094,17 +1073,14 @@ func (d *Driver) create(id, parent string, opts *graphdriver.CreateOpts, disable
perms = os.FileMode(st.Mode())
}
if err := idtools.MkdirAs(path.Join(workDirBase, "diff"), perms, rootUID, rootGID); err != nil {
if err := idtools.MkdirAs(path.Join(dir, "diff"), perms, rootUID, rootGID); err != nil {
return err
}
lid := generateID(idLength)
linkBase := path.Join("..", id, "diff")
if imageStore != "" {
linkBase = path.Join(imageStore, "diff")
}
if err := os.Symlink(linkBase, path.Join(d.home, linkDir, lid)); err != nil {
if err := os.Symlink(linkBase, path.Join(homedir, linkDir, lid)); err != nil {
return err
}
@ -1113,10 +1089,10 @@ func (d *Driver) create(id, parent string, opts *graphdriver.CreateOpts, disable
return err
}
if err := idtools.MkdirAs(path.Join(workDirBase, "work"), 0o700, rootUID, rootGID); err != nil {
if err := idtools.MkdirAs(path.Join(dir, "work"), 0o700, rootUID, rootGID); err != nil {
return err
}
if err := idtools.MkdirAs(path.Join(workDirBase, "merged"), 0o700, rootUID, rootGID); err != nil {
if err := idtools.MkdirAs(path.Join(dir, "merged"), 0o700, rootUID, rootGID); err != nil {
return err
}
@ -1198,26 +1174,39 @@ func (d *Driver) getLower(parent string) (string, error) {
}
func (d *Driver) dir(id string) string {
p, _, _ := d.dir2(id)
p, _, _ := d.dir2(id, false)
return p
}
func (d *Driver) dir2(id string) (string, string, bool) {
newpath := path.Join(d.home, id)
imageStore := ""
func (d *Driver) getAllImageStores() []string {
additionalImageStores := d.AdditionalImageStores()
if d.imageStore != "" {
imageStore = path.Join(d.imageStore, id)
additionalImageStores = append([]string{d.imageStore}, additionalImageStores...)
}
return additionalImageStores
}
func (d *Driver) dir2(id string, useImageStore bool) (string, string, bool) {
var homedir string
if useImageStore && d.imageStore != "" {
homedir = path.Join(d.imageStore, d.name)
} else {
homedir = d.home
}
newpath := path.Join(homedir, id)
if _, err := os.Stat(newpath); err != nil {
for _, p := range d.AdditionalImageStores() {
for _, p := range d.getAllImageStores() {
l := path.Join(p, d.name, id)
_, err = os.Stat(l)
if err == nil {
return l, imageStore, true
return l, homedir, true
}
}
}
return newpath, imageStore, false
return newpath, homedir, false
}
func (d *Driver) getLowerDirs(id string) ([]string, error) {
@ -1427,14 +1416,11 @@ func (d *Driver) Get(id string, options graphdriver.MountOpts) (string, error) {
}
func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountOpts) (_ string, retErr error) {
dir, imageStore, inAdditionalStore := d.dir2(id)
dir, _, inAdditionalStore := d.dir2(id, false)
if _, err := os.Stat(dir); err != nil {
return "", err
}
workDirBase := dir
if imageStore != "" {
workDirBase = imageStore
}
readWrite := !inAdditionalStore
if !d.SupportsShifting() || options.DisableShifting {
@ -1539,7 +1525,7 @@ func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountO
}()
composeFsLayers := []string{}
composeFsLayersDir := filepath.Join(workDirBase, "composefs-layers")
composeFsLayersDir := filepath.Join(dir, "composefs-layers")
maybeAddComposefsMount := func(lowerID string, i int, readWrite bool) (string, error) {
composefsBlob := d.getComposefsData(lowerID)
_, err = os.Stat(composefsBlob)
@ -1573,7 +1559,7 @@ func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountO
return dest, nil
}
diffDir := path.Join(workDirBase, "diff")
diffDir := path.Join(dir, "diff")
if dest, err := maybeAddComposefsMount(id, 0, readWrite); err != nil {
return "", err
@ -1591,7 +1577,7 @@ func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountO
lower := ""
newpath := path.Join(d.home, l)
if st, err := os.Stat(newpath); err != nil {
for _, p := range d.AdditionalImageStores() {
for _, p := range d.getAllImageStores() {
lower = path.Join(p, d.name, l)
if st2, err2 := os.Stat(lower); err2 == nil {
if !permsKnown {
@ -1659,21 +1645,27 @@ func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountO
optsList = append(optsList, "metacopy=on", "redirect_dir=on")
}
if len(absLowers) == 0 {
absLowers = append(absLowers, path.Join(dir, "empty"))
}
// user namespace requires this to move a directory from lower to upper.
rootUID, rootGID, err := idtools.GetRootUIDGID(options.UidMaps, options.GidMaps)
if err != nil {
return "", err
}
if err := idtools.MkdirAllAs(diffDir, perms, rootUID, rootGID); err != nil {
return "", err
if len(absLowers) == 0 {
absLowers = append(absLowers, path.Join(dir, "empty"))
}
mergedDir := path.Join(workDirBase, "merged")
if err := idtools.MkdirAllAs(diffDir, perms, rootUID, rootGID); err != nil {
if !inAdditionalStore {
return "", err
}
// if it is in an additional store, do not fail if the directory already exists
if _, err2 := os.Stat(diffDir); err2 != nil {
return "", err
}
}
mergedDir := path.Join(dir, "merged")
// Create the driver merged dir
if err := idtools.MkdirAs(mergedDir, 0o700, rootUID, rootGID); err != nil && !os.IsExist(err) {
return "", err
@ -1691,7 +1683,7 @@ func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountO
}
}()
workdir := path.Join(workDirBase, "work")
workdir := path.Join(dir, "work")
if d.options.mountProgram == "" && unshare.IsRootless() {
optsList = append(optsList, "userxattr")
@ -1841,7 +1833,7 @@ func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountO
// Put unmounts the mount path created for the give id.
func (d *Driver) Put(id string) error {
dir := d.dir(id)
dir, _, inAdditionalStore := d.dir2(id, false)
if _, err := os.Stat(dir); err != nil {
return err
}
@ -1902,11 +1894,27 @@ func (d *Driver) Put(id string) error {
}
}
if err := unix.Rmdir(mountpoint); err != nil && !os.IsNotExist(err) {
logrus.Debugf("Failed to remove mountpoint %s overlay: %s - %v", id, mountpoint, err)
return fmt.Errorf("removing mount point %q: %w", mountpoint, err)
}
if !inAdditionalStore {
uid, gid := int(0), int(0)
fi, err := os.Stat(mountpoint)
if err != nil {
return err
}
if stat, ok := fi.Sys().(*syscall.Stat_t); ok {
uid, gid = int(stat.Uid), int(stat.Gid)
}
tmpMountpoint := path.Join(dir, "merged.1")
if err := idtools.MkdirAs(tmpMountpoint, 0o700, uid, gid); err != nil && !errors.Is(err, os.ErrExist) {
return err
}
// rename(2) can be used on an empty directory, as it is the mountpoint after umount, and it retains
// its atomic semantic. In this way the "merged" directory is never removed.
if err := unix.Rename(tmpMountpoint, mountpoint); err != nil {
logrus.Debugf("Failed to replace mountpoint %s overlay: %s - %v", id, mountpoint, err)
return fmt.Errorf("replacing mount point %q: %w", mountpoint, err)
}
}
return nil
}
@ -1994,14 +2002,18 @@ func (g *overlayFileGetter) Close() error {
return nil
}
func (d *Driver) getStagingDir() string {
return filepath.Join(d.home, stagingDir)
func (d *Driver) getStagingDir(id string) string {
_, homedir, _ := d.dir2(id, d.imageStore != "")
return filepath.Join(homedir, stagingDir)
}
// DiffGetter returns a FileGetCloser that can read files from the directory that
// contains files for the layer differences, either for this layer, or one of our
// lowers if we're just a template directory. Used for direct access for tar-split.
func (d *Driver) DiffGetter(id string) (graphdriver.FileGetCloser, error) {
if d.usingComposefs {
return nil, nil
}
p, err := d.getDiffPath(id)
if err != nil {
return nil, err
@ -2018,9 +2030,9 @@ func (d *Driver) CleanupStagingDirectory(stagingDirectory string) error {
return os.RemoveAll(stagingDirectory)
}
func (d *Driver) supportsDataOnlyLayers() (bool, error) {
func supportsDataOnlyLayersCached(home, runhome string) (bool, error) {
feature := "dataonly-layers"
overlayCacheResult, overlayCacheText, err := cachedFeatureCheck(d.runhome, feature)
overlayCacheResult, overlayCacheText, err := cachedFeatureCheck(runhome, feature)
if err == nil {
if overlayCacheResult {
logrus.Debugf("Cached value indicated that data-only layers for overlay are supported")
@ -2029,27 +2041,15 @@ func (d *Driver) supportsDataOnlyLayers() (bool, error) {
logrus.Debugf("Cached value indicated that data-only layers for overlay are not supported")
return false, errors.New(overlayCacheText)
}
supportsDataOnly, err := supportsDataOnlyLayers(d.home)
if err2 := cachedFeatureRecord(d.runhome, feature, supportsDataOnly, ""); err2 != nil {
supportsDataOnly, err := supportsDataOnlyLayers(home)
if err2 := cachedFeatureRecord(runhome, feature, supportsDataOnly, ""); err2 != nil {
return false, fmt.Errorf("recording overlay data-only layers support status: %w", err2)
}
return supportsDataOnly, err
}
func (d *Driver) useComposeFs() bool {
if !composeFsSupported() || unshare.IsRootless() {
return false
}
supportsDataOnlyLayers, err := d.supportsDataOnlyLayers()
if err != nil {
logrus.Debugf("Check for data-only layers failed with: %v", err)
return false
}
return supportsDataOnlyLayers
}
// ApplyDiff applies the changes in the new layer using the specified function
func (d *Driver) ApplyDiffWithDiffer(id, parent string, options *graphdriver.ApplyDiffOpts, differ graphdriver.Differ) (output graphdriver.DriverWithDifferOutput, err error) {
func (d *Driver) ApplyDiffWithDiffer(id, parent string, options *graphdriver.ApplyDiffWithDifferOpts, differ graphdriver.Differ) (output graphdriver.DriverWithDifferOutput, err error) {
var idMappings *idtools.IDMappings
if options != nil {
idMappings = options.Mappings
@ -2061,15 +2061,22 @@ func (d *Driver) ApplyDiffWithDiffer(id, parent string, options *graphdriver.App
var applyDir string
if id == "" {
err := os.MkdirAll(d.getStagingDir(), 0o700)
stagingDir := d.getStagingDir(id)
err := os.MkdirAll(stagingDir, 0o700)
if err != nil && !os.IsExist(err) {
return graphdriver.DriverWithDifferOutput{}, err
}
applyDir, err = os.MkdirTemp(d.getStagingDir(), "")
applyDir, err = os.MkdirTemp(stagingDir, "")
if err != nil {
return graphdriver.DriverWithDifferOutput{}, err
}
perms := defaultPerms
if d.options.forceMask != nil {
perms = *d.options.forceMask
}
if err := os.Chmod(applyDir, perms); err != nil {
return graphdriver.DriverWithDifferOutput{}, err
}
} else {
var err error
applyDir, err = d.getDiffPath(id)
@ -2083,8 +2090,9 @@ func (d *Driver) ApplyDiffWithDiffer(id, parent string, options *graphdriver.App
differOptions := graphdriver.DifferOptions{
Format: graphdriver.DifferOutputFormatDir,
}
if d.useComposeFs() {
if d.usingComposefs {
differOptions.Format = graphdriver.DifferOutputFormatFlat
differOptions.UseFsVerity = graphdriver.DifferFsVerityEnabled
}
out, err := differ.ApplyDiff(applyDir, &archive.TarOptions{
UIDMaps: idMappings.UIDs(),
@ -2100,33 +2108,42 @@ func (d *Driver) ApplyDiffWithDiffer(id, parent string, options *graphdriver.App
}
// ApplyDiffFromStagingDirectory applies the changes using the specified staging directory.
func (d *Driver) ApplyDiffFromStagingDirectory(id, parent, stagingDirectory string, diffOutput *graphdriver.DriverWithDifferOutput, options *graphdriver.ApplyDiffOpts) error {
if filepath.Dir(stagingDirectory) != d.getStagingDir() {
func (d *Driver) ApplyDiffFromStagingDirectory(id, parent string, diffOutput *graphdriver.DriverWithDifferOutput, options *graphdriver.ApplyDiffWithDifferOpts) error {
stagingDirectory := diffOutput.Target
if filepath.Dir(stagingDirectory) != d.getStagingDir(id) {
return fmt.Errorf("%q is not a staging directory", stagingDirectory)
}
if d.useComposeFs() {
// FIXME: move this logic into the differ so we don't have to open
// the file twice.
verityDigests, err := enableVerityRecursive(stagingDirectory)
if err != nil && !errors.Is(err, unix.ENOTSUP) && !errors.Is(err, unix.ENOTTY) {
logrus.Warningf("%s", err)
}
toc := diffOutput.Artifacts[tocArtifact]
if err := generateComposeFsBlob(verityDigests, toc, d.getComposefsData(id)); err != nil {
return err
}
}
diffPath, err := d.getDiffPath(id)
if err != nil {
return err
}
// If the current layer doesn't set the mode for the parent, override it with the parent layer's mode.
if d.options.forceMask == nil && diffOutput.RootDirMode == nil && parent != "" {
parentDiffPath, err := d.getDiffPath(parent)
if err != nil {
return err
}
parentSt, err := os.Stat(parentDiffPath)
if err != nil {
return err
}
if err := os.Chmod(stagingDirectory, parentSt.Mode()); err != nil {
return err
}
}
if d.usingComposefs {
toc := diffOutput.Artifacts[tocArtifact]
verityDigests := diffOutput.Artifacts[fsVerityDigestsArtifact].(map[string]string)
if err := generateComposeFsBlob(verityDigests, toc, d.getComposefsData(id)); err != nil {
return err
}
}
if err := os.RemoveAll(diffPath); err != nil && !os.IsNotExist(err) {
return err
}
diffOutput.UncompressedDigest = diffOutput.TOCDigest
return os.Rename(stagingDirectory, diffPath)
}
@ -2179,12 +2196,8 @@ func (d *Driver) getComposefsData(id string) string {
}
func (d *Driver) getDiffPath(id string) (string, error) {
dir, imagestore, _ := d.dir2(id)
base := dir
if imagestore != "" {
base = imagestore
}
return redirectDiffIfAdditionalLayer(path.Join(base, "diff"))
dir := d.dir(id)
return redirectDiffIfAdditionalLayer(path.Join(dir, "diff"))
}
func (d *Driver) getLowerDiffPaths(id string) ([]string, error) {
@ -2275,12 +2288,8 @@ func (d *Driver) AdditionalImageStores() []string {
// by toContainer to those specified by toHost.
func (d *Driver) UpdateLayerIDMap(id string, toContainer, toHost *idtools.IDMappings, mountLabel string) error {
var err error
dir, imagestore, _ := d.dir2(id)
base := dir
if imagestore != "" {
base = imagestore
}
diffDir := filepath.Join(base, "diff")
dir := d.dir(id)
diffDir := filepath.Join(dir, "diff")
rootUID, rootGID := 0, 0
if toHost != nil {

View file

@ -4,6 +4,7 @@
package overlay
import (
"fmt"
"path"
"github.com/containers/storage/pkg/directory"
@ -15,3 +16,15 @@ import (
func (d *Driver) ReadWriteDiskUsage(id string) (*directory.DiskUsage, error) {
return directory.Usage(path.Join(d.dir(id), "diff"))
}
func getComposeFsHelper() (string, error) {
return "", fmt.Errorf("composefs not supported on this build")
}
func mountComposefsBlob(dataDir, mountPoint string) error {
return fmt.Errorf("composefs not supported on this build")
}
func generateComposeFsBlob(verityDigests map[string]string, toc interface{}, composefsDir string) error {
return fmt.Errorf("composefs not supported on this build")
}

View file

@ -1,453 +1,5 @@
//go:build linux && !exclude_disk_quota && cgo
// +build linux,!exclude_disk_quota,cgo
//
// projectquota.go - implements XFS project quota controls
// for setting quota limits on a newly created directory.
// It currently supports the legacy XFS specific ioctls.
//
// TODO: use generic quota control ioctl FS_IOC_FS{GET,SET}XATTR
// for both xfs/ext4 for kernel version >= v4.5
//
package quota
/*
#include <stdlib.h>
#include <dirent.h>
#include <linux/fs.h>
#include <linux/quota.h>
#include <linux/dqblk_xfs.h>
#ifndef FS_XFLAG_PROJINHERIT
struct fsxattr {
__u32 fsx_xflags;
__u32 fsx_extsize;
__u32 fsx_nextents;
__u32 fsx_projid;
unsigned char fsx_pad[12];
};
#define FS_XFLAG_PROJINHERIT 0x00000200
#endif
#ifndef FS_IOC_FSGETXATTR
#define FS_IOC_FSGETXATTR _IOR ('X', 31, struct fsxattr)
#endif
#ifndef FS_IOC_FSSETXATTR
#define FS_IOC_FSSETXATTR _IOW ('X', 32, struct fsxattr)
#endif
#ifndef PRJQUOTA
#define PRJQUOTA 2
#endif
#ifndef FS_PROJ_QUOTA
#define FS_PROJ_QUOTA 2
#endif
#ifndef Q_XSETPQLIM
#define Q_XSETPQLIM QCMD(Q_XSETQLIM, PRJQUOTA)
#endif
#ifndef Q_XGETPQUOTA
#define Q_XGETPQUOTA QCMD(Q_XGETQUOTA, PRJQUOTA)
#endif
*/
import "C"
import (
"errors"
"fmt"
"math"
"os"
"path"
"path/filepath"
"sync"
"syscall"
"unsafe"
"github.com/containers/storage/pkg/directory"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
const projectIDsAllocatedPerQuotaHome = 10000
// BackingFsBlockDeviceLink is the name of a file that we place in
// the home directory of a driver that uses this package.
const BackingFsBlockDeviceLink = "backingFsBlockDev"
// Quota limit params - currently we only control blocks hard limit and inodes
type Quota struct {
Size uint64
Inodes uint64
}
// Control - Context to be used by storage driver (e.g. overlay)
// who wants to apply project quotas to container dirs
type Control struct {
backingFsBlockDev string
nextProjectID uint32
quotas *sync.Map
basePath string
}
// Attempt to generate a unigue projectid. Multiple directories
// per file system can have quota and they need a group of unique
// ids. This function attempts to allocate at least projectIDsAllocatedPerQuotaHome(10000)
// unique projectids, based on the inode of the basepath.
func generateUniqueProjectID(path string) (uint32, error) {
fileinfo, err := os.Stat(path)
if err != nil {
return 0, err
}
stat, ok := fileinfo.Sys().(*syscall.Stat_t)
if !ok {
return 0, fmt.Errorf("not a syscall.Stat_t %s", path)
}
projectID := projectIDsAllocatedPerQuotaHome + (stat.Ino*projectIDsAllocatedPerQuotaHome)%(math.MaxUint32-projectIDsAllocatedPerQuotaHome)
return uint32(projectID), nil
}
// NewControl - initialize project quota support.
// Test to make sure that quota can be set on a test dir and find
// the first project id to be used for the next container create.
//
// Returns nil (and error) if project quota is not supported.
//
// First get the project id of the basePath directory.
// This test will fail if the backing fs is not xfs.
//
// xfs_quota tool can be used to assign a project id to the driver home directory, e.g.:
// echo 100000:/var/lib/containers/storage/overlay >> /etc/projects
// echo 200000:/var/lib/containers/storage/volumes >> /etc/projects
// echo storage:100000 >> /etc/projid
// echo volumes:200000 >> /etc/projid
// xfs_quota -x -c 'project -s storage volumes' /<xfs mount point>
//
// In the example above, the storage directory project id will be used as a
// "start offset" and all containers will be assigned larger project ids
// (e.g. >= 100000). Then the volumes directory project id will be used as a
// "start offset" and all volumes will be assigned larger project ids
// (e.g. >= 200000).
// This is a way to prevent xfs_quota management from conflicting with
// containers/storage.
// Then try to create a test directory with the next project id and set a quota
// on it. If that works, continue to scan existing containers to map allocated
// project ids.
func NewControl(basePath string) (*Control, error) {
//
// Get project id of parent dir as minimal id to be used by driver
//
minProjectID, err := getProjectID(basePath)
if err != nil {
return nil, err
}
if minProjectID == 0 {
// Indicates the storage was never initialized
// Generate a unique range of Projectids for this basepath
minProjectID, err = generateUniqueProjectID(basePath)
if err != nil {
return nil, err
}
}
//
// create backing filesystem device node
//
backingFsBlockDev, err := makeBackingFsDev(basePath)
if err != nil {
return nil, err
}
//
// Test if filesystem supports project quotas by trying to set
// a quota on the first available project id
//
quota := Quota{
Size: 0,
Inodes: 0,
}
q := Control{
backingFsBlockDev: backingFsBlockDev,
nextProjectID: minProjectID + 1,
quotas: &sync.Map{},
basePath: basePath,
}
if err := q.setProjectQuota(minProjectID, quota); err != nil {
return nil, err
}
//
// get first project id to be used for next container
//
err = q.findNextProjectID()
if err != nil {
return nil, err
}
logrus.Debugf("NewControl(%s): nextProjectID = %d", basePath, q.nextProjectID)
return &q, nil
}
// SetQuota - assign a unique project id to directory and set the quota limits
// for that project id
func (q *Control) SetQuota(targetPath string, quota Quota) error {
var projectID uint32
value, ok := q.quotas.Load(targetPath)
if ok {
projectID, ok = value.(uint32)
}
if !ok {
projectID = q.nextProjectID
//
// assign project id to new container directory
//
err := setProjectID(targetPath, projectID)
if err != nil {
return err
}
q.quotas.Store(targetPath, projectID)
q.nextProjectID++
}
//
// set the quota limit for the container's project id
//
logrus.Debugf("SetQuota path=%s, size=%d, inodes=%d, projectID=%d", targetPath, quota.Size, quota.Inodes, projectID)
return q.setProjectQuota(projectID, quota)
}
// ClearQuota removes the map entry in the quotas map for targetPath.
// It does so to prevent the map leaking entries as directories are deleted.
func (q *Control) ClearQuota(targetPath string) {
q.quotas.Delete(targetPath)
}
// setProjectQuota - set the quota for project id on xfs block device
func (q *Control) setProjectQuota(projectID uint32, quota Quota) error {
var d C.fs_disk_quota_t
d.d_version = C.FS_DQUOT_VERSION
d.d_id = C.__u32(projectID)
d.d_flags = C.FS_PROJ_QUOTA
if quota.Size > 0 {
d.d_fieldmask = d.d_fieldmask | C.FS_DQ_BHARD | C.FS_DQ_BSOFT
d.d_blk_hardlimit = C.__u64(quota.Size / 512)
d.d_blk_softlimit = d.d_blk_hardlimit
}
if quota.Inodes > 0 {
d.d_fieldmask = d.d_fieldmask | C.FS_DQ_IHARD | C.FS_DQ_ISOFT
d.d_ino_hardlimit = C.__u64(quota.Inodes)
d.d_ino_softlimit = d.d_ino_hardlimit
}
cs := C.CString(q.backingFsBlockDev)
defer C.free(unsafe.Pointer(cs))
runQuotactl := func() syscall.Errno {
_, _, errno := unix.Syscall6(unix.SYS_QUOTACTL, C.Q_XSETPQLIM,
uintptr(unsafe.Pointer(cs)), uintptr(d.d_id),
uintptr(unsafe.Pointer(&d)), 0, 0)
return errno
}
errno := runQuotactl()
// If the backingFsBlockDev does not exist any more then try to recreate it.
if errors.Is(errno, unix.ENOENT) {
if _, err := makeBackingFsDev(q.basePath); err != nil {
return fmt.Errorf(
"failed to recreate missing backingFsBlockDev %s for projid %d: %w",
q.backingFsBlockDev, projectID, err,
)
}
if errno := runQuotactl(); errno != 0 {
return fmt.Errorf("failed to set quota limit for projid %d on %s after backingFsBlockDev recreation: %w",
projectID, q.backingFsBlockDev, errno)
}
} else if errno != 0 {
return fmt.Errorf("failed to set quota limit for projid %d on %s: %w",
projectID, q.backingFsBlockDev, errno)
}
return nil
}
// GetQuota - get the quota limits of a directory that was configured with SetQuota
func (q *Control) GetQuota(targetPath string, quota *Quota) error {
d, err := q.fsDiskQuotaFromPath(targetPath)
if err != nil {
return err
}
quota.Size = uint64(d.d_blk_hardlimit) * 512
quota.Inodes = uint64(d.d_ino_hardlimit)
return nil
}
// GetDiskUsage - get the current disk usage of a directory that was configured with SetQuota
func (q *Control) GetDiskUsage(targetPath string, usage *directory.DiskUsage) error {
d, err := q.fsDiskQuotaFromPath(targetPath)
if err != nil {
return err
}
usage.Size = int64(d.d_bcount) * 512
usage.InodeCount = int64(d.d_icount)
return nil
}
func (q *Control) fsDiskQuotaFromPath(targetPath string) (C.fs_disk_quota_t, error) {
var d C.fs_disk_quota_t
var projectID uint32
value, ok := q.quotas.Load(targetPath)
if ok {
projectID, ok = value.(uint32)
}
if !ok {
return d, fmt.Errorf("quota not found for path : %s", targetPath)
}
//
// get the quota limit for the container's project id
//
cs := C.CString(q.backingFsBlockDev)
defer C.free(unsafe.Pointer(cs))
_, _, errno := unix.Syscall6(unix.SYS_QUOTACTL, C.Q_XGETPQUOTA,
uintptr(unsafe.Pointer(cs)), uintptr(C.__u32(projectID)),
uintptr(unsafe.Pointer(&d)), 0, 0)
if errno != 0 {
return d, fmt.Errorf("failed to get quota limit for projid %d on %s: %w",
projectID, q.backingFsBlockDev, errno)
}
return d, nil
}
// getProjectID - get the project id of path on xfs
func getProjectID(targetPath string) (uint32, error) {
dir, err := openDir(targetPath)
if err != nil {
return 0, err
}
defer closeDir(dir)
var fsx C.struct_fsxattr
_, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.FS_IOC_FSGETXATTR,
uintptr(unsafe.Pointer(&fsx)))
if errno != 0 {
return 0, fmt.Errorf("failed to get projid for %s: %w", targetPath, errno)
}
return uint32(fsx.fsx_projid), nil
}
// setProjectID - set the project id of path on xfs
func setProjectID(targetPath string, projectID uint32) error {
dir, err := openDir(targetPath)
if err != nil {
return err
}
defer closeDir(dir)
var fsx C.struct_fsxattr
_, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.FS_IOC_FSGETXATTR,
uintptr(unsafe.Pointer(&fsx)))
if errno != 0 {
return fmt.Errorf("failed to get projid for %s: %w", targetPath, errno)
}
fsx.fsx_projid = C.__u32(projectID)
fsx.fsx_xflags |= C.FS_XFLAG_PROJINHERIT
_, _, errno = unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.FS_IOC_FSSETXATTR,
uintptr(unsafe.Pointer(&fsx)))
if errno != 0 {
return fmt.Errorf("failed to set projid for %s: %w", targetPath, errno)
}
return nil
}
// findNextProjectID - find the next project id to be used for containers
// by scanning driver home directory to find used project ids
func (q *Control) findNextProjectID() error {
files, err := os.ReadDir(q.basePath)
if err != nil {
return fmt.Errorf("read directory failed : %s", q.basePath)
}
for _, file := range files {
if !file.IsDir() {
continue
}
path := filepath.Join(q.basePath, file.Name())
projid, err := getProjectID(path)
if err != nil {
return err
}
if projid > 0 {
q.quotas.Store(path, projid)
}
if q.nextProjectID <= projid {
q.nextProjectID = projid + 1
}
}
return nil
}
func free(p *C.char) {
C.free(unsafe.Pointer(p))
}
func openDir(path string) (*C.DIR, error) {
Cpath := C.CString(path)
defer free(Cpath)
dir, errno := C.opendir(Cpath)
if dir == nil {
return nil, fmt.Errorf("can't open dir %v: %w", Cpath, errno)
}
return dir, nil
}
func closeDir(dir *C.DIR) {
if dir != nil {
C.closedir(dir)
}
}
func getDirFd(dir *C.DIR) uintptr {
return uintptr(C.dirfd(dir))
}
// Get the backing block device of the driver home directory
// and create a block device node under the home directory
// to be used by quotactl commands
func makeBackingFsDev(home string) (string, error) {
var stat unix.Stat_t
if err := unix.Stat(home, &stat); err != nil {
return "", err
}
backingFsBlockDev := path.Join(home, BackingFsBlockDeviceLink)
backingFsBlockDevTmp := backingFsBlockDev + ".tmp"
// Re-create just in case someone copied the home directory over to a new device
if err := unix.Mknod(backingFsBlockDevTmp, unix.S_IFBLK|0o600, int(stat.Dev)); err != nil {
if !errors.Is(err, unix.EEXIST) {
return "", fmt.Errorf("failed to mknod %s: %w", backingFsBlockDevTmp, err)
}
// On EEXIST, try again after unlinking any potential leftover.
_ = unix.Unlink(backingFsBlockDevTmp)
if err := unix.Mknod(backingFsBlockDevTmp, unix.S_IFBLK|0o600, int(stat.Dev)); err != nil {
return "", fmt.Errorf("failed to mknod %s: %w", backingFsBlockDevTmp, err)
}
}
if err := unix.Rename(backingFsBlockDevTmp, backingFsBlockDev); err != nil {
return "", fmt.Errorf("failed to rename %s to %s: %w", backingFsBlockDevTmp, backingFsBlockDev, err)
}
return backingFsBlockDev, nil
}

View file

@ -0,0 +1,449 @@
//go:build linux && !exclude_disk_quota && cgo
// +build linux,!exclude_disk_quota,cgo
//
// projectquota.go - implements XFS project quota controls
// for setting quota limits on a newly created directory.
// It currently supports the legacy XFS specific ioctls.
//
// TODO: use generic quota control ioctl FS_IOC_FS{GET,SET}XATTR
// for both xfs/ext4 for kernel version >= v4.5
//
package quota
/*
#include <stdlib.h>
#include <dirent.h>
#include <linux/fs.h>
#include <linux/quota.h>
#include <linux/dqblk_xfs.h>
#ifndef FS_XFLAG_PROJINHERIT
struct fsxattr {
__u32 fsx_xflags;
__u32 fsx_extsize;
__u32 fsx_nextents;
__u32 fsx_projid;
unsigned char fsx_pad[12];
};
#define FS_XFLAG_PROJINHERIT 0x00000200
#endif
#ifndef FS_IOC_FSGETXATTR
#define FS_IOC_FSGETXATTR _IOR ('X', 31, struct fsxattr)
#endif
#ifndef FS_IOC_FSSETXATTR
#define FS_IOC_FSSETXATTR _IOW ('X', 32, struct fsxattr)
#endif
#ifndef PRJQUOTA
#define PRJQUOTA 2
#endif
#ifndef FS_PROJ_QUOTA
#define FS_PROJ_QUOTA 2
#endif
#ifndef Q_XSETPQLIM
#define Q_XSETPQLIM QCMD(Q_XSETQLIM, PRJQUOTA)
#endif
#ifndef Q_XGETPQUOTA
#define Q_XGETPQUOTA QCMD(Q_XGETQUOTA, PRJQUOTA)
#endif
*/
import "C"
import (
"errors"
"fmt"
"math"
"os"
"path"
"path/filepath"
"sync"
"syscall"
"unsafe"
"github.com/containers/storage/pkg/directory"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
const projectIDsAllocatedPerQuotaHome = 10000
// Quota limit params - currently we only control blocks hard limit and inodes
type Quota struct {
Size uint64
Inodes uint64
}
// Control - Context to be used by storage driver (e.g. overlay)
// who wants to apply project quotas to container dirs
type Control struct {
backingFsBlockDev string
nextProjectID uint32
quotas *sync.Map
basePath string
}
// Attempt to generate a unigue projectid. Multiple directories
// per file system can have quota and they need a group of unique
// ids. This function attempts to allocate at least projectIDsAllocatedPerQuotaHome(10000)
// unique projectids, based on the inode of the basepath.
func generateUniqueProjectID(path string) (uint32, error) {
fileinfo, err := os.Stat(path)
if err != nil {
return 0, err
}
stat, ok := fileinfo.Sys().(*syscall.Stat_t)
if !ok {
return 0, fmt.Errorf("not a syscall.Stat_t %s", path)
}
projectID := projectIDsAllocatedPerQuotaHome + (stat.Ino*projectIDsAllocatedPerQuotaHome)%(math.MaxUint32-projectIDsAllocatedPerQuotaHome)
return uint32(projectID), nil
}
// NewControl - initialize project quota support.
// Test to make sure that quota can be set on a test dir and find
// the first project id to be used for the next container create.
//
// Returns nil (and error) if project quota is not supported.
//
// First get the project id of the basePath directory.
// This test will fail if the backing fs is not xfs.
//
// xfs_quota tool can be used to assign a project id to the driver home directory, e.g.:
// echo 100000:/var/lib/containers/storage/overlay >> /etc/projects
// echo 200000:/var/lib/containers/storage/volumes >> /etc/projects
// echo storage:100000 >> /etc/projid
// echo volumes:200000 >> /etc/projid
// xfs_quota -x -c 'project -s storage volumes' /<xfs mount point>
//
// In the example above, the storage directory project id will be used as a
// "start offset" and all containers will be assigned larger project ids
// (e.g. >= 100000). Then the volumes directory project id will be used as a
// "start offset" and all volumes will be assigned larger project ids
// (e.g. >= 200000).
// This is a way to prevent xfs_quota management from conflicting with
// containers/storage.
// Then try to create a test directory with the next project id and set a quota
// on it. If that works, continue to scan existing containers to map allocated
// project ids.
func NewControl(basePath string) (*Control, error) {
//
// Get project id of parent dir as minimal id to be used by driver
//
minProjectID, err := getProjectID(basePath)
if err != nil {
return nil, err
}
if minProjectID == 0 {
// Indicates the storage was never initialized
// Generate a unique range of Projectids for this basepath
minProjectID, err = generateUniqueProjectID(basePath)
if err != nil {
return nil, err
}
}
//
// create backing filesystem device node
//
backingFsBlockDev, err := makeBackingFsDev(basePath)
if err != nil {
return nil, err
}
//
// Test if filesystem supports project quotas by trying to set
// a quota on the first available project id
//
quota := Quota{
Size: 0,
Inodes: 0,
}
q := Control{
backingFsBlockDev: backingFsBlockDev,
nextProjectID: minProjectID + 1,
quotas: &sync.Map{},
basePath: basePath,
}
if err := q.setProjectQuota(minProjectID, quota); err != nil {
return nil, err
}
//
// get first project id to be used for next container
//
err = q.findNextProjectID()
if err != nil {
return nil, err
}
logrus.Debugf("NewControl(%s): nextProjectID = %d", basePath, q.nextProjectID)
return &q, nil
}
// SetQuota - assign a unique project id to directory and set the quota limits
// for that project id
func (q *Control) SetQuota(targetPath string, quota Quota) error {
var projectID uint32
value, ok := q.quotas.Load(targetPath)
if ok {
projectID, ok = value.(uint32)
}
if !ok {
projectID = q.nextProjectID
//
// assign project id to new container directory
//
err := setProjectID(targetPath, projectID)
if err != nil {
return err
}
q.quotas.Store(targetPath, projectID)
q.nextProjectID++
}
//
// set the quota limit for the container's project id
//
logrus.Debugf("SetQuota path=%s, size=%d, inodes=%d, projectID=%d", targetPath, quota.Size, quota.Inodes, projectID)
return q.setProjectQuota(projectID, quota)
}
// ClearQuota removes the map entry in the quotas map for targetPath.
// It does so to prevent the map leaking entries as directories are deleted.
func (q *Control) ClearQuota(targetPath string) {
q.quotas.Delete(targetPath)
}
// setProjectQuota - set the quota for project id on xfs block device
func (q *Control) setProjectQuota(projectID uint32, quota Quota) error {
var d C.fs_disk_quota_t
d.d_version = C.FS_DQUOT_VERSION
d.d_id = C.__u32(projectID)
d.d_flags = C.FS_PROJ_QUOTA
if quota.Size > 0 {
d.d_fieldmask = d.d_fieldmask | C.FS_DQ_BHARD | C.FS_DQ_BSOFT
d.d_blk_hardlimit = C.__u64(quota.Size / 512)
d.d_blk_softlimit = d.d_blk_hardlimit
}
if quota.Inodes > 0 {
d.d_fieldmask = d.d_fieldmask | C.FS_DQ_IHARD | C.FS_DQ_ISOFT
d.d_ino_hardlimit = C.__u64(quota.Inodes)
d.d_ino_softlimit = d.d_ino_hardlimit
}
cs := C.CString(q.backingFsBlockDev)
defer C.free(unsafe.Pointer(cs))
runQuotactl := func() syscall.Errno {
_, _, errno := unix.Syscall6(unix.SYS_QUOTACTL, C.Q_XSETPQLIM,
uintptr(unsafe.Pointer(cs)), uintptr(d.d_id),
uintptr(unsafe.Pointer(&d)), 0, 0)
return errno
}
errno := runQuotactl()
// If the backingFsBlockDev does not exist any more then try to recreate it.
if errors.Is(errno, unix.ENOENT) {
if _, err := makeBackingFsDev(q.basePath); err != nil {
return fmt.Errorf(
"failed to recreate missing backingFsBlockDev %s for projid %d: %w",
q.backingFsBlockDev, projectID, err,
)
}
if errno := runQuotactl(); errno != 0 {
return fmt.Errorf("failed to set quota limit for projid %d on %s after backingFsBlockDev recreation: %w",
projectID, q.backingFsBlockDev, errno)
}
} else if errno != 0 {
return fmt.Errorf("failed to set quota limit for projid %d on %s: %w",
projectID, q.backingFsBlockDev, errno)
}
return nil
}
// GetQuota - get the quota limits of a directory that was configured with SetQuota
func (q *Control) GetQuota(targetPath string, quota *Quota) error {
d, err := q.fsDiskQuotaFromPath(targetPath)
if err != nil {
return err
}
quota.Size = uint64(d.d_blk_hardlimit) * 512
quota.Inodes = uint64(d.d_ino_hardlimit)
return nil
}
// GetDiskUsage - get the current disk usage of a directory that was configured with SetQuota
func (q *Control) GetDiskUsage(targetPath string, usage *directory.DiskUsage) error {
d, err := q.fsDiskQuotaFromPath(targetPath)
if err != nil {
return err
}
usage.Size = int64(d.d_bcount) * 512
usage.InodeCount = int64(d.d_icount)
return nil
}
func (q *Control) fsDiskQuotaFromPath(targetPath string) (C.fs_disk_quota_t, error) {
var d C.fs_disk_quota_t
var projectID uint32
value, ok := q.quotas.Load(targetPath)
if ok {
projectID, ok = value.(uint32)
}
if !ok {
return d, fmt.Errorf("quota not found for path : %s", targetPath)
}
//
// get the quota limit for the container's project id
//
cs := C.CString(q.backingFsBlockDev)
defer C.free(unsafe.Pointer(cs))
_, _, errno := unix.Syscall6(unix.SYS_QUOTACTL, C.Q_XGETPQUOTA,
uintptr(unsafe.Pointer(cs)), uintptr(C.__u32(projectID)),
uintptr(unsafe.Pointer(&d)), 0, 0)
if errno != 0 {
return d, fmt.Errorf("failed to get quota limit for projid %d on %s: %w",
projectID, q.backingFsBlockDev, errno)
}
return d, nil
}
// getProjectID - get the project id of path on xfs
func getProjectID(targetPath string) (uint32, error) {
dir, err := openDir(targetPath)
if err != nil {
return 0, err
}
defer closeDir(dir)
var fsx C.struct_fsxattr
_, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.FS_IOC_FSGETXATTR,
uintptr(unsafe.Pointer(&fsx)))
if errno != 0 {
return 0, fmt.Errorf("failed to get projid for %s: %w", targetPath, errno)
}
return uint32(fsx.fsx_projid), nil
}
// setProjectID - set the project id of path on xfs
func setProjectID(targetPath string, projectID uint32) error {
dir, err := openDir(targetPath)
if err != nil {
return err
}
defer closeDir(dir)
var fsx C.struct_fsxattr
_, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.FS_IOC_FSGETXATTR,
uintptr(unsafe.Pointer(&fsx)))
if errno != 0 {
return fmt.Errorf("failed to get projid for %s: %w", targetPath, errno)
}
fsx.fsx_projid = C.__u32(projectID)
fsx.fsx_xflags |= C.FS_XFLAG_PROJINHERIT
_, _, errno = unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.FS_IOC_FSSETXATTR,
uintptr(unsafe.Pointer(&fsx)))
if errno != 0 {
return fmt.Errorf("failed to set projid for %s: %w", targetPath, errno)
}
return nil
}
// findNextProjectID - find the next project id to be used for containers
// by scanning driver home directory to find used project ids
func (q *Control) findNextProjectID() error {
files, err := os.ReadDir(q.basePath)
if err != nil {
return fmt.Errorf("read directory failed : %s", q.basePath)
}
for _, file := range files {
if !file.IsDir() {
continue
}
path := filepath.Join(q.basePath, file.Name())
projid, err := getProjectID(path)
if err != nil {
return err
}
if projid > 0 {
q.quotas.Store(path, projid)
}
if q.nextProjectID <= projid {
q.nextProjectID = projid + 1
}
}
return nil
}
func free(p *C.char) {
C.free(unsafe.Pointer(p))
}
func openDir(path string) (*C.DIR, error) {
Cpath := C.CString(path)
defer free(Cpath)
dir, errno := C.opendir(Cpath)
if dir == nil {
return nil, fmt.Errorf("can't open dir %v: %w", Cpath, errno)
}
return dir, nil
}
func closeDir(dir *C.DIR) {
if dir != nil {
C.closedir(dir)
}
}
func getDirFd(dir *C.DIR) uintptr {
return uintptr(C.dirfd(dir))
}
// Get the backing block device of the driver home directory
// and create a block device node under the home directory
// to be used by quotactl commands
func makeBackingFsDev(home string) (string, error) {
var stat unix.Stat_t
if err := unix.Stat(home, &stat); err != nil {
return "", err
}
backingFsBlockDev := path.Join(home, BackingFsBlockDeviceLink)
backingFsBlockDevTmp := backingFsBlockDev + ".tmp"
// Re-create just in case someone copied the home directory over to a new device
if err := unix.Mknod(backingFsBlockDevTmp, unix.S_IFBLK|0o600, int(stat.Dev)); err != nil {
if !errors.Is(err, unix.EEXIST) {
return "", fmt.Errorf("failed to mknod %s: %w", backingFsBlockDevTmp, err)
}
// On EEXIST, try again after unlinking any potential leftover.
_ = unix.Unlink(backingFsBlockDevTmp)
if err := unix.Mknod(backingFsBlockDevTmp, unix.S_IFBLK|0o600, int(stat.Dev)); err != nil {
return "", fmt.Errorf("failed to mknod %s: %w", backingFsBlockDevTmp, err)
}
}
if err := unix.Rename(backingFsBlockDevTmp, backingFsBlockDev); err != nil {
return "", fmt.Errorf("failed to rename %s to %s: %w", backingFsBlockDevTmp, backingFsBlockDev, err)
}
return backingFsBlockDev, nil
}

View file

@ -31,8 +31,9 @@ func init() {
func Init(home string, options graphdriver.Options) (graphdriver.Driver, error) {
d := &Driver{
name: "vfs",
homes: []string{home},
home: home,
idMappings: idtools.NewIDMappingsFromMaps(options.UIDMaps, options.GIDMaps),
imageStore: options.ImageStore,
}
rootIDs := d.idMappings.RootPair()
@ -47,7 +48,7 @@ func Init(home string, options graphdriver.Options) (graphdriver.Driver, error)
key = strings.ToLower(key)
switch key {
case "vfs.imagestore", ".imagestore":
d.homes = append(d.homes, strings.Split(val, ",")...)
d.additionalHomes = append(d.additionalHomes, strings.Split(val, ",")...)
continue
case "vfs.mountopt":
return nil, fmt.Errorf("vfs driver does not support mount options")
@ -62,12 +63,7 @@ func Init(home string, options graphdriver.Options) (graphdriver.Driver, error)
return nil, fmt.Errorf("vfs driver does not support %s options", key)
}
}
// If --imagestore is provided, lets add writable graphRoot
// to vfs's additional image store, as it is done for
// `overlay` driver.
if options.ImageStore != "" {
d.homes = append(d.homes, options.ImageStore)
}
d.updater = graphdriver.NewNaiveLayerIDMapUpdater(d)
d.naiveDiff = graphdriver.NewNaiveDiffDriver(d, d.updater)
@ -80,11 +76,13 @@ func Init(home string, options graphdriver.Options) (graphdriver.Driver, error)
// Driver must be wrapped in NaiveDiffDriver to be used as a graphdriver.Driver
type Driver struct {
name string
homes []string
home string
additionalHomes []string
idMappings *idtools.IDMappings
ignoreChownErrors bool
naiveDiff graphdriver.DiffDriver
updater graphdriver.LayerIDMapUpdater
imageStore string
}
func (d *Driver) String() string {
@ -158,7 +156,7 @@ func (d *Driver) create(id, parent string, opts *graphdriver.CreateOpts, ro bool
idMappings = opts.IDMappings
}
dir := d.dir(id)
dir := d.dir2(id, ro)
rootIDs := idMappings.RootPair()
if err := idtools.MkdirAllAndChown(filepath.Dir(dir), 0o700, rootIDs); err != nil {
return err
@ -204,18 +202,32 @@ func (d *Driver) create(id, parent string, opts *graphdriver.CreateOpts, ro bool
return nil
}
func (d *Driver) dir(id string) string {
for i, home := range d.homes {
if i > 0 {
home = filepath.Join(home, d.String())
func (d *Driver) dir2(id string, useImageStore bool) string {
var homedir string
if useImageStore && d.imageStore != "" {
homedir = filepath.Join(d.imageStore, d.String(), "dir", filepath.Base(id))
} else {
homedir = filepath.Join(d.home, "dir", filepath.Base(id))
}
if _, err := os.Stat(homedir); err != nil {
additionalHomes := d.additionalHomes[:]
if d.imageStore != "" {
additionalHomes = append(additionalHomes, d.imageStore)
}
candidate := filepath.Join(home, "dir", filepath.Base(id))
fi, err := os.Stat(candidate)
if err == nil && fi.IsDir() {
return candidate
for _, home := range additionalHomes {
candidate := filepath.Join(home, d.String(), "dir", filepath.Base(id))
fi, err := os.Stat(candidate)
if err == nil && fi.IsDir() {
return candidate
}
}
}
return filepath.Join(d.homes[0], "dir", filepath.Base(id))
return homedir
}
func (d *Driver) dir(id string) string {
return d.dir2(id, false)
}
// Remove deletes the content from the directory for a given id.
@ -263,7 +275,7 @@ func (d *Driver) Exists(id string) bool {
// List layers (not including additional image stores)
func (d *Driver) ListLayers() ([]string, error) {
entries, err := os.ReadDir(filepath.Join(d.homes[0], "dir"))
entries, err := os.ReadDir(filepath.Join(d.home, "dir"))
if err != nil {
return nil, err
}
@ -285,8 +297,8 @@ func (d *Driver) ListLayers() ([]string, error) {
// AdditionalImageStores returns additional image stores supported by the driver
func (d *Driver) AdditionalImageStores() []string {
if len(d.homes) > 1 {
return d.homes[1:]
if len(d.additionalHomes) > 0 {
return d.additionalHomes
}
return nil
}

View file

@ -126,6 +126,13 @@ type Layer struct {
// as a DiffID.
UncompressedDigest digest.Digest `json:"diff-digest,omitempty"`
// TOCDigest represents the digest of the Table of Contents (TOC) of the blob.
// This digest is utilized when the UncompressedDigest is not
// validated during the partial image pull process, but the
// TOC itself is validated.
// It serves as an alternative reference under these specific conditions.
TOCDigest digest.Digest `json:"toc-digest,omitempty"`
// UncompressedSize is the length of the blob that was last passed to
// ApplyDiff() or create(), after we decompressed it. If
// UncompressedDigest is not set, this should be treated as if it were
@ -174,6 +181,13 @@ type DiffOptions struct {
Compression *archive.Compression
}
// stagedLayerOptions are the options passed to .create to populate a staged
// layer
type stagedLayerOptions struct {
DiffOutput *drivers.DriverWithDifferOutput
DiffOptions *drivers.ApplyDiffWithDifferOpts
}
// roLayerStore wraps a graph driver, adding the ability to refer to layers by
// name, and keeping track of parent-child relationships, along with a list of
// all known layers.
@ -228,6 +242,10 @@ type roLayerStore interface {
// specified uncompressed digest value recorded for them.
LayersByUncompressedDigest(d digest.Digest) ([]Layer, error)
// LayersByTOCDigest returns a slice of the layers with the
// specified uncompressed digest value recorded for them.
LayersByTOCDigest(d digest.Digest) ([]Layer, error)
// Layers returns a slice of the known layers.
Layers() ([]Layer, error)
}
@ -256,7 +274,7 @@ type rwLayerStore interface {
// underlying drivers do not themselves distinguish between writeable
// and read-only layers. Returns the new layer structure and the size of the
// diff which was applied to its parent to initialize its contents.
create(id string, parent *Layer, names []string, mountLabel string, options map[string]string, moreOptions *LayerOptions, writeable bool, diff io.Reader) (*Layer, int64, error)
create(id string, parent *Layer, names []string, mountLabel string, options map[string]string, moreOptions *LayerOptions, writeable bool, diff io.Reader, slo *stagedLayerOptions) (*Layer, int64, error)
// updateNames modifies names associated with a layer based on (op, names).
updateNames(id string, names []string, op updateNameOperation) error
@ -296,13 +314,13 @@ type rwLayerStore interface {
// ApplyDiffWithDiffer applies the changes through the differ callback function.
// If to is the empty string, then a staging directory is created by the driver.
ApplyDiffWithDiffer(to string, options *drivers.ApplyDiffOpts, differ drivers.Differ) (*drivers.DriverWithDifferOutput, error)
ApplyDiffWithDiffer(to string, options *drivers.ApplyDiffWithDifferOpts, differ drivers.Differ) (*drivers.DriverWithDifferOutput, error)
// CleanupStagingDirectory cleanups the staging directory. It can be used to cleanup the staging directory on errors
CleanupStagingDirectory(stagingDirectory string) error
// ApplyDiffFromStagingDirectory uses stagingDirectory to create the diff.
ApplyDiffFromStagingDirectory(id, stagingDirectory string, diffOutput *drivers.DriverWithDifferOutput, options *drivers.ApplyDiffOpts) error
// applyDiffFromStagingDirectory uses diffOutput.Target to create the diff.
applyDiffFromStagingDirectory(id string, diffOutput *drivers.DriverWithDifferOutput, options *drivers.ApplyDiffWithDifferOpts) error
// DifferTarget gets the location where files are stored for the layer.
DifferTarget(id string) (string, error)
@ -316,10 +334,71 @@ type rwLayerStore interface {
GarbageCollect() error
}
type multipleLockFile struct {
lockfiles []*lockfile.LockFile
}
func (l multipleLockFile) Lock() {
for _, lock := range l.lockfiles {
lock.Lock()
}
}
func (l multipleLockFile) RLock() {
for _, lock := range l.lockfiles {
lock.RLock()
}
}
func (l multipleLockFile) Unlock() {
for _, lock := range l.lockfiles {
lock.Unlock()
}
}
func (l multipleLockFile) ModifiedSince(lastWrite lockfile.LastWrite) (lockfile.LastWrite, bool, error) {
// Look up only the first lockfile, since this is the value returned by RecordWrite().
return l.lockfiles[0].ModifiedSince(lastWrite)
}
func (l multipleLockFile) AssertLockedForWriting() {
for _, lock := range l.lockfiles {
lock.AssertLockedForWriting()
}
}
func (l multipleLockFile) GetLastWrite() (lockfile.LastWrite, error) {
return l.lockfiles[0].GetLastWrite()
}
func (l multipleLockFile) RecordWrite() (lockfile.LastWrite, error) {
var lastWrite *lockfile.LastWrite
for _, lock := range l.lockfiles {
lw, err := lock.RecordWrite()
if err != nil {
return lw, err
}
// Return the first value we get so we know that
// all the locks have a write time >= to this one.
if lastWrite == nil {
lastWrite = &lw
}
}
return *lastWrite, nil
}
func (l multipleLockFile) IsReadWrite() bool {
return l.lockfiles[0].IsReadWrite()
}
func newMultipleLockFile(l ...*lockfile.LockFile) *multipleLockFile {
return &multipleLockFile{lockfiles: l}
}
type layerStore struct {
// The following fields are only set when constructing layerStore, and must never be modified afterwards.
// They are safe to access without any other locking.
lockfile *lockfile.LockFile // lockfile.IsReadWrite can be used to distinguish between read-write and read-only layer stores.
lockfile *multipleLockFile // lockfile.IsReadWrite can be used to distinguish between read-write and read-only layer stores.
mountsLockfile *lockfile.LockFile // Can _only_ be obtained with inProcessLock held.
rundir string
jsonPath [numLayerLocationIndex]string
@ -337,6 +416,7 @@ type layerStore struct {
bymount map[string]*Layer
bycompressedsum map[digest.Digest][]string
byuncompressedsum map[digest.Digest][]string
bytocsum map[digest.Digest][]string
layerspathsModified [numLayerLocationIndex]time.Time
// FIXME: This field is only set when constructing layerStore, but locking rules of the driver
@ -366,6 +446,7 @@ func copyLayer(l *Layer) *Layer {
CompressedSize: l.CompressedSize,
UncompressedDigest: l.UncompressedDigest,
UncompressedSize: l.UncompressedSize,
TOCDigest: l.TOCDigest,
CompressionType: l.CompressionType,
ReadOnly: l.ReadOnly,
volatileStore: l.volatileStore,
@ -745,6 +826,7 @@ func (r *layerStore) load(lockedForWriting bool) (bool, error) {
names := make(map[string]*Layer)
compressedsums := make(map[digest.Digest][]string)
uncompressedsums := make(map[digest.Digest][]string)
tocsums := make(map[digest.Digest][]string)
var errorToResolveBySaving error // == nil; if there are multiple errors, this is one of them.
if r.lockfile.IsReadWrite() {
selinux.ClearLabels()
@ -765,6 +847,9 @@ func (r *layerStore) load(lockedForWriting bool) (bool, error) {
if layer.UncompressedDigest != "" {
uncompressedsums[layer.UncompressedDigest] = append(uncompressedsums[layer.UncompressedDigest], layer.ID)
}
if layer.TOCDigest != "" {
tocsums[layer.TOCDigest] = append(tocsums[layer.TOCDigest], layer.ID)
}
if layer.MountLabel != "" {
selinux.ReserveLabel(layer.MountLabel)
}
@ -792,6 +877,7 @@ func (r *layerStore) load(lockedForWriting bool) (bool, error) {
r.byname = names
r.bycompressedsum = compressedsums
r.byuncompressedsum = uncompressedsums
r.bytocsum = tocsums
// Load and merge information about which layers are mounted, and where.
if r.lockfile.IsReadWrite() {
@ -998,22 +1084,37 @@ func (r *layerStore) saveMounts() error {
return r.loadMounts()
}
func (s *store) newLayerStore(rundir string, layerdir string, driver drivers.Driver, transient bool) (rwLayerStore, error) {
func (s *store) newLayerStore(rundir, layerdir, imagedir string, driver drivers.Driver, transient bool) (rwLayerStore, error) {
if err := os.MkdirAll(rundir, 0o700); err != nil {
return nil, err
}
if err := os.MkdirAll(layerdir, 0o700); err != nil {
return nil, err
}
if imagedir != "" {
if err := os.MkdirAll(imagedir, 0o700); err != nil {
return nil, err
}
}
// Note: While the containers.lock file is in rundir for transient stores
// we don't want to do this here, because the non-transient layers in
// layers.json might be used externally as a read-only layer (using e.g.
// additionalimagestores), and that would look for the lockfile in the
// same directory
var lockFiles []*lockfile.LockFile
lockFile, err := lockfile.GetLockFile(filepath.Join(layerdir, "layers.lock"))
if err != nil {
return nil, err
}
lockFiles = append(lockFiles, lockFile)
if imagedir != "" {
lockFile, err := lockfile.GetLockFile(filepath.Join(imagedir, "layers.lock"))
if err != nil {
return nil, err
}
lockFiles = append(lockFiles, lockFile)
}
mountsLockfile, err := lockfile.GetLockFile(filepath.Join(rundir, "mountpoints.lock"))
if err != nil {
return nil, err
@ -1023,7 +1124,7 @@ func (s *store) newLayerStore(rundir string, layerdir string, driver drivers.Dri
volatileDir = rundir
}
rlstore := layerStore{
lockfile: lockFile,
lockfile: newMultipleLockFile(lockFiles...),
mountsLockfile: mountsLockfile,
rundir: rundir,
jsonPath: [numLayerLocationIndex]string{
@ -1060,7 +1161,7 @@ func newROLayerStore(rundir string, layerdir string, driver drivers.Driver) (roL
return nil, err
}
rlstore := layerStore{
lockfile: lockfile,
lockfile: newMultipleLockFile(lockfile),
mountsLockfile: nil,
rundir: rundir,
jsonPath: [numLayerLocationIndex]string{
@ -1112,7 +1213,7 @@ func (r *layerStore) Size(name string) (int64, error) {
// We use the presence of a non-empty digest as an indicator that the size value was intentionally set, and that
// a zero value is not just present because it was never set to anything else (which can happen if the layer was
// created by a version of this library that didn't keep track of digest and size information).
if layer.UncompressedDigest != "" {
if layer.TOCDigest != "" || layer.UncompressedDigest != "" {
return layer.UncompressedSize, nil
}
return -1, nil
@ -1201,6 +1302,9 @@ func (r *layerStore) PutAdditionalLayer(id string, parentLayer *Layer, names []s
if layer.UncompressedDigest != "" {
r.byuncompressedsum[layer.UncompressedDigest] = append(r.byuncompressedsum[layer.UncompressedDigest], layer.ID)
}
if layer.TOCDigest != "" {
r.bytocsum[layer.TOCDigest] = append(r.bytocsum[layer.TOCDigest], layer.ID)
}
if err := r.saveFor(layer); err != nil {
if e := r.Delete(layer.ID); e != nil {
logrus.Errorf("While recovering from a failure to save layers, error deleting layer %#v: %v", id, e)
@ -1211,7 +1315,7 @@ func (r *layerStore) PutAdditionalLayer(id string, parentLayer *Layer, names []s
}
// Requires startWriting.
func (r *layerStore) create(id string, parentLayer *Layer, names []string, mountLabel string, options map[string]string, moreOptions *LayerOptions, writeable bool, diff io.Reader) (layer *Layer, size int64, err error) {
func (r *layerStore) create(id string, parentLayer *Layer, names []string, mountLabel string, options map[string]string, moreOptions *LayerOptions, writeable bool, diff io.Reader, slo *stagedLayerOptions) (layer *Layer, size int64, err error) {
if moreOptions == nil {
moreOptions = &LayerOptions{}
}
@ -1251,6 +1355,7 @@ func (r *layerStore) create(id string, parentLayer *Layer, names []string, mount
templateCompressedDigest digest.Digest
templateCompressedSize int64
templateUncompressedDigest digest.Digest
templateTOCDigest digest.Digest
templateUncompressedSize int64
templateCompressionType archive.Compression
templateUIDs, templateGIDs []uint32
@ -1263,6 +1368,7 @@ func (r *layerStore) create(id string, parentLayer *Layer, names []string, mount
}
templateMetadata = templateLayer.Metadata
templateIDMappings = idtools.NewIDMappingsFromMaps(templateLayer.UIDMap, templateLayer.GIDMap)
templateTOCDigest = templateLayer.TOCDigest
templateCompressedDigest, templateCompressedSize = templateLayer.CompressedDigest, templateLayer.CompressedSize
templateUncompressedDigest, templateUncompressedSize = templateLayer.UncompressedDigest, templateLayer.UncompressedSize
templateCompressionType = templateLayer.CompressionType
@ -1291,6 +1397,7 @@ func (r *layerStore) create(id string, parentLayer *Layer, names []string, mount
CompressedDigest: templateCompressedDigest,
CompressedSize: templateCompressedSize,
UncompressedDigest: templateUncompressedDigest,
TOCDigest: templateTOCDigest,
UncompressedSize: templateUncompressedSize,
CompressionType: templateCompressionType,
UIDs: templateUIDs,
@ -1402,6 +1509,11 @@ func (r *layerStore) create(id string, parentLayer *Layer, names []string, mount
cleanupFailureContext = "applying layer diff"
return nil, -1, err
}
} else if slo != nil {
if err := r.applyDiffFromStagingDirectory(layer.ID, slo.DiffOutput, slo.DiffOptions); err != nil {
cleanupFailureContext = "applying staged directory diff"
return nil, -1, err
}
} else {
// applyDiffWithOptions() would have updated r.bycompressedsum
// and r.byuncompressedsum for us, but if we used a template
@ -1413,6 +1525,9 @@ func (r *layerStore) create(id string, parentLayer *Layer, names []string, mount
if layer.UncompressedDigest != "" {
r.byuncompressedsum[layer.UncompressedDigest] = append(r.byuncompressedsum[layer.UncompressedDigest], layer.ID)
}
if layer.TOCDigest != "" {
r.bytocsum[layer.TOCDigest] = append(r.bytocsum[layer.TOCDigest], layer.ID)
}
}
delete(layer.Flags, incompleteFlag)
@ -2007,9 +2122,16 @@ func (s *simpleGetCloser) Close() error {
// LOCKING BUG: See the comments in layerStore.Diff
func (r *layerStore) newFileGetter(id string) (drivers.FileGetCloser, error) {
if getter, ok := r.driver.(drivers.DiffGetterDriver); ok {
return getter.DiffGetter(id)
fgc, err := getter.DiffGetter(id)
if err != nil {
return nil, err
}
if fgc != nil {
return fgc, nil
}
}
path, err := r.Mount(id, drivers.MountOpts{})
path, err := r.Mount(id, drivers.MountOpts{Options: []string{"ro"}})
if err != nil {
return nil, err
}
@ -2197,6 +2319,25 @@ func (r *layerStore) DiffSize(from, to string) (size int64, err error) {
return r.driver.DiffSize(to, r.layerMappings(toLayer), from, r.layerMappings(fromLayer), toLayer.MountLabel)
}
func updateDigestMap(m *map[digest.Digest][]string, oldvalue, newvalue digest.Digest, id string) {
var newList []string
if oldvalue != "" {
for _, value := range (*m)[oldvalue] {
if value != id {
newList = append(newList, value)
}
}
if len(newList) > 0 {
(*m)[oldvalue] = newList
} else {
delete(*m, oldvalue)
}
}
if newvalue != "" {
(*m)[newvalue] = append((*m)[newvalue], id)
}
}
// Requires startWriting.
func (r *layerStore) ApplyDiff(to string, diff io.Reader) (size int64, err error) {
return r.applyDiffWithOptions(to, nil, diff)
@ -2233,7 +2374,7 @@ func (r *layerStore) applyDiffWithOptions(to string, layerOptions *LayerOptions,
if layerOptions != nil && layerOptions.UncompressedDigest != "" &&
layerOptions.UncompressedDigest.Algorithm() == digest.Canonical {
uncompressedDigest = layerOptions.UncompressedDigest
} else {
} else if compression != archive.Uncompressed {
uncompressedDigester = digest.Canonical.Digester()
}
@ -2312,28 +2453,17 @@ func (r *layerStore) applyDiffWithOptions(to string, layerOptions *LayerOptions,
if uncompressedDigester != nil {
uncompressedDigest = uncompressedDigester.Digest()
}
updateDigestMap := func(m *map[digest.Digest][]string, oldvalue, newvalue digest.Digest, id string) {
var newList []string
if oldvalue != "" {
for _, value := range (*m)[oldvalue] {
if value != id {
newList = append(newList, value)
}
}
if len(newList) > 0 {
(*m)[oldvalue] = newList
} else {
delete(*m, oldvalue)
}
}
if newvalue != "" {
(*m)[newvalue] = append((*m)[newvalue], id)
}
if uncompressedDigest == "" && compression == archive.Uncompressed {
uncompressedDigest = compressedDigest
}
updateDigestMap(&r.bycompressedsum, layer.CompressedDigest, compressedDigest, layer.ID)
layer.CompressedDigest = compressedDigest
layer.CompressedSize = compressedCounter.Count
if layerOptions != nil && layerOptions.OriginalDigest != "" && layerOptions.OriginalSize != nil {
layer.CompressedSize = *layerOptions.OriginalSize
} else {
layer.CompressedSize = compressedCounter.Count
}
updateDigestMap(&r.byuncompressedsum, layer.UncompressedDigest, uncompressedDigest, layer.ID)
layer.UncompressedDigest = uncompressedDigest
layer.UncompressedSize = uncompressedCounter.Count
@ -2372,7 +2502,7 @@ func (r *layerStore) DifferTarget(id string) (string, error) {
}
// Requires startWriting.
func (r *layerStore) ApplyDiffFromStagingDirectory(id, stagingDirectory string, diffOutput *drivers.DriverWithDifferOutput, options *drivers.ApplyDiffOpts) error {
func (r *layerStore) applyDiffFromStagingDirectory(id string, diffOutput *drivers.DriverWithDifferOutput, options *drivers.ApplyDiffWithDifferOpts) error {
ddriver, ok := r.driver.(drivers.DriverWithDiffer)
if !ok {
return ErrNotSupported
@ -2382,20 +2512,39 @@ func (r *layerStore) ApplyDiffFromStagingDirectory(id, stagingDirectory string,
return ErrLayerUnknown
}
if options == nil {
options = &drivers.ApplyDiffOpts{
Mappings: r.layerMappings(layer),
MountLabel: layer.MountLabel,
options = &drivers.ApplyDiffWithDifferOpts{
ApplyDiffOpts: drivers.ApplyDiffOpts{
Mappings: r.layerMappings(layer),
MountLabel: layer.MountLabel,
},
Flags: nil,
}
}
err := ddriver.ApplyDiffFromStagingDirectory(layer.ID, layer.Parent, stagingDirectory, diffOutput, options)
err := ddriver.ApplyDiffFromStagingDirectory(layer.ID, layer.Parent, diffOutput, options)
if err != nil {
return err
}
layer.UIDs = diffOutput.UIDs
layer.GIDs = diffOutput.GIDs
updateDigestMap(&r.byuncompressedsum, layer.UncompressedDigest, diffOutput.UncompressedDigest, layer.ID)
layer.UncompressedDigest = diffOutput.UncompressedDigest
updateDigestMap(&r.bytocsum, diffOutput.TOCDigest, diffOutput.TOCDigest, layer.ID)
layer.TOCDigest = diffOutput.TOCDigest
layer.UncompressedSize = diffOutput.Size
layer.Metadata = diffOutput.Metadata
if options != nil && options.Flags != nil {
if layer.Flags == nil {
layer.Flags = make(map[string]interface{})
}
for k, v := range options.Flags {
layer.Flags[k] = v
}
}
if err = r.saveFor(layer); err != nil {
return err
}
if len(diffOutput.TarSplit) != 0 {
tsdata := bytes.Buffer{}
compressor, err := pgzip.NewWriterLevel(&tsdata, pgzip.BestSpeed)
@ -2425,14 +2574,11 @@ func (r *layerStore) ApplyDiffFromStagingDirectory(id, stagingDirectory string,
return err
}
}
if err = r.saveFor(layer); err != nil {
return err
}
return err
}
// Requires startWriting.
func (r *layerStore) ApplyDiffWithDiffer(to string, options *drivers.ApplyDiffOpts, differ drivers.Differ) (*drivers.DriverWithDifferOutput, error) {
func (r *layerStore) ApplyDiffWithDiffer(to string, options *drivers.ApplyDiffWithDifferOpts, differ drivers.Differ) (*drivers.DriverWithDifferOutput, error) {
ddriver, ok := r.driver.(drivers.DriverWithDiffer)
if !ok {
return nil, ErrNotSupported
@ -2448,9 +2594,11 @@ func (r *layerStore) ApplyDiffWithDiffer(to string, options *drivers.ApplyDiffOp
return nil, ErrLayerUnknown
}
if options == nil {
options = &drivers.ApplyDiffOpts{
Mappings: r.layerMappings(layer),
MountLabel: layer.MountLabel,
options = &drivers.ApplyDiffWithDifferOpts{
ApplyDiffOpts: drivers.ApplyDiffOpts{
Mappings: r.layerMappings(layer),
MountLabel: layer.MountLabel,
},
}
}
output, err := ddriver.ApplyDiffWithDiffer(layer.ID, layer.Parent, options, differ)
@ -2494,6 +2642,11 @@ func (r *layerStore) LayersByUncompressedDigest(d digest.Digest) ([]Layer, error
return r.layersByDigestMap(r.byuncompressedsum, d)
}
// Requires startReading or startWriting.
func (r *layerStore) LayersByTOCDigest(d digest.Digest) ([]Layer, error) {
return r.layersByDigestMap(r.bytocsum, d)
}
func closeAll(closes ...func() error) (rErr error) {
for _, f := range closes {
if err := f(); err != nil {

View file

@ -339,12 +339,43 @@ func (compression *Compression) Extension() string {
return ""
}
// nosysFileInfo hides the system-dependent info of the wrapped FileInfo to
// prevent tar.FileInfoHeader from introspecting it and potentially calling into
// glibc.
type nosysFileInfo struct {
os.FileInfo
}
func (fi nosysFileInfo) Sys() interface{} {
// A Sys value of type *tar.Header is safe as it is system-independent.
// The tar.FileInfoHeader function copies the fields into the returned
// header without performing any OS lookups.
if sys, ok := fi.FileInfo.Sys().(*tar.Header); ok {
return sys
}
return nil
}
// sysStatOverride, if non-nil, populates hdr from system-dependent fields of fi.
var sysStatOverride func(fi os.FileInfo, hdr *tar.Header) error
func fileInfoHeaderNoLookups(fi os.FileInfo, link string) (*tar.Header, error) {
if sysStatOverride == nil {
return tar.FileInfoHeader(fi, link)
}
hdr, err := tar.FileInfoHeader(nosysFileInfo{fi}, link)
if err != nil {
return nil, err
}
return hdr, sysStatOverride(fi, hdr)
}
// FileInfoHeader creates a populated Header from fi.
// Compared to archive pkg this function fills in more information.
// Also, regardless of Go version, this function fills file type bits (e.g. hdr.Mode |= modeISDIR),
// which have been deleted since Go 1.9 archive/tar.
func FileInfoHeader(name string, fi os.FileInfo, link string) (*tar.Header, error) {
hdr, err := tar.FileInfoHeader(fi, link)
hdr, err := fileInfoHeaderNoLookups(fi, link)
if err != nil {
return nil, err
}
@ -385,7 +416,7 @@ func ReadUserXattrToTarHeader(path string, hdr *tar.Header) error {
return err
}
for _, key := range xattrs {
if strings.HasPrefix(key, "user.") {
if strings.HasPrefix(key, "user.") && !strings.HasPrefix(key, "user.overlay.") {
value, err := system.Lgetxattr(path, key)
if err != nil {
if errors.Is(err, system.E2BIG) {
@ -477,7 +508,7 @@ func (ta *tarAppender) addTarFile(path, name string) error {
}
}
if fi.Mode()&os.ModeSocket != 0 {
logrus.Warnf("archive: skipping %q since it is a socket", path)
logrus.Infof("archive: skipping %q since it is a socket", path)
return nil
}
@ -534,6 +565,10 @@ func (ta *tarAppender) addTarFile(path, name string) error {
if ta.ChownOpts != nil {
hdr.Uid = ta.ChownOpts.UID
hdr.Gid = ta.ChownOpts.GID
// Dont expose the user names from the local system; they probably dont match the ta.ChownOpts value anyway,
// and they unnecessarily give recipients of the tar file potentially private data.
hdr.Uname = ""
hdr.Gname = ""
}
maybeTruncateHeaderModTime(hdr)

View file

@ -15,6 +15,31 @@ import (
"golang.org/x/sys/unix"
)
func init() {
sysStatOverride = statUnix
}
// statUnix populates hdr from system-dependent fields of fi without performing
// any OS lookups.
// Adapted from Moby.
func statUnix(fi os.FileInfo, hdr *tar.Header) error {
s, ok := fi.Sys().(*syscall.Stat_t)
if !ok {
return nil
}
hdr.Uid = int(s.Uid)
hdr.Gid = int(s.Gid)
if s.Mode&unix.S_IFBLK != 0 ||
s.Mode&unix.S_IFCHR != 0 {
hdr.Devmajor = int64(unix.Major(uint64(s.Rdev))) //nolint: unconvert
hdr.Devminor = int64(unix.Minor(uint64(s.Rdev))) //nolint: unconvert
}
return nil
}
// fixVolumePathPrefix does platform specific processing to ensure that if
// the path being passed in is not in a volume path format, convert it to one.
func fixVolumePathPrefix(srcPath string) string {

View file

@ -25,7 +25,7 @@ import (
const (
cacheKey = "chunked-manifest-cache"
cacheVersion = 1
cacheVersion = 2
digestSha256Empty = "sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
)
@ -207,9 +207,9 @@ func calculateHardLinkFingerprint(f *internal.FileMetadata) (string, error) {
return string(digester.Digest()), nil
}
// generateFileLocation generates a file location in the form $OFFSET@$PATH
func generateFileLocation(path string, offset uint64) []byte {
return []byte(fmt.Sprintf("%d@%s", offset, path))
// generateFileLocation generates a file location in the form $OFFSET:$LEN:$PATH
func generateFileLocation(path string, offset, len uint64) []byte {
return []byte(fmt.Sprintf("%d:%d:%s", offset, len, path))
}
// generateTag generates a tag in the form $DIGEST$OFFSET@LEN.
@ -245,7 +245,7 @@ func writeCache(manifest []byte, format graphdriver.DifferOutputFormat, id strin
var tags []string
for _, k := range toc {
if k.Digest != "" {
location := generateFileLocation(k.Name, 0)
location := generateFileLocation(k.Name, 0, uint64(k.Size))
off := uint64(vdata.Len())
l := uint64(len(location))
@ -276,7 +276,7 @@ func writeCache(manifest []byte, format graphdriver.DifferOutputFormat, id strin
digestLen = len(k.Digest)
}
if k.ChunkDigest != "" {
location := generateFileLocation(k.Name, uint64(k.ChunkOffset))
location := generateFileLocation(k.Name, uint64(k.ChunkOffset), uint64(k.ChunkSize))
off := uint64(vdata.Len())
l := uint64(len(location))
d := generateTag(k.ChunkDigest, off, l)
@ -490,7 +490,9 @@ func findTag(digest string, metadata *metadata) (string, uint64, uint64) {
if digest == d {
startOff := i*metadata.tagLen + metadata.digestLen
parts := strings.Split(string(metadata.tags[startOff:(i+1)*metadata.tagLen]), "@")
off, _ := strconv.ParseInt(parts[0], 10, 64)
len, _ := strconv.ParseInt(parts[1], 10, 64)
return digest, uint64(off), uint64(len)
}
@ -507,12 +509,16 @@ func (c *layersCache) findDigestInternal(digest string) (string, string, int64,
defer c.mutex.RUnlock()
for _, layer := range c.layers {
digest, off, len := findTag(digest, layer.metadata)
digest, off, tagLen := findTag(digest, layer.metadata)
if digest != "" {
position := string(layer.metadata.vdata[off : off+len])
parts := strings.SplitN(position, "@", 2)
position := string(layer.metadata.vdata[off : off+tagLen])
parts := strings.SplitN(position, ":", 3)
if len(parts) != 3 {
continue
}
offFile, _ := strconv.ParseInt(parts[0], 10, 64)
return layer.target, parts[1], offFile, nil
// parts[1] is the chunk length, currently unused.
return layer.target, parts[2], offFile, nil
}
}
@ -578,7 +584,10 @@ func unmarshalToc(manifest []byte) (*internal.TOC, error) {
return byteSliceAsString(buf.Bytes()[from:to])
}
iter = jsoniter.ParseBytes(jsoniter.ConfigFastest, manifest)
pool := iter.Pool()
pool.ReturnIterator(iter)
iter = pool.BorrowIterator(manifest)
for field := iter.ReadObject(); field != ""; field = iter.ReadObject() {
if strings.ToLower(field) == "version" {
toc.Version = iter.ReadInt()
@ -657,8 +666,17 @@ func unmarshalToc(manifest []byte) (*internal.TOC, error) {
}
toc.Entries = append(toc.Entries, m)
}
break
}
// validate there is no extra data in the provided input. This is a security measure to avoid
// that the digest we calculate for the TOC refers to the entire document.
if iter.Error != nil && iter.Error != io.EOF {
return nil, iter.Error
}
if iter.WhatIsNext() != jsoniter.InvalidValue || !errors.Is(iter.Error, io.EOF) {
return nil, fmt.Errorf("unexpected data after manifest")
}
toc.StringsBuf = buf
return &toc, nil
}

View file

@ -257,8 +257,8 @@ func readZstdChunkedManifest(blobStream ImageSourceSeekable, blobSize int64, ann
return decodedBlob, decodedTarSplit, int64(footerData.Offset), err
}
func decodeAndValidateBlob(blob []byte, lengthUncompressed uint64, expectedUncompressedChecksum string) ([]byte, error) {
d, err := digest.Parse(expectedUncompressedChecksum)
func decodeAndValidateBlob(blob []byte, lengthUncompressed uint64, expectedCompressedChecksum string) ([]byte, error) {
d, err := digest.Parse(expectedCompressedChecksum)
if err != nil {
return nil, err
}

View file

@ -420,6 +420,14 @@ func writeZstdChunkedStream(destFile io.Writer, outMetadata map[string]string, r
zstdWriter.Close()
return err
}
// make sure the entire tarball is flushed to the output as it might contain
// some trailing zeros that affect the checksum.
if _, err := io.Copy(zstdWriter, its); err != nil {
zstdWriter.Close()
return err
}
if err := zstdWriter.Flush(); err != nil {
zstdWriter.Close()
return err
@ -452,12 +460,12 @@ type zstdChunkedWriter struct {
}
func (w zstdChunkedWriter) Close() error {
err := <-w.tarSplitErr
if err != nil {
w.tarSplitOut.Close()
errClose := w.tarSplitOut.Close()
if err := <-w.tarSplitErr; err != nil && err != io.EOF {
return err
}
return w.tarSplitOut.Close()
return errClose
}
func (w zstdChunkedWriter) Write(p []byte) (int, error) {

View file

@ -4,6 +4,7 @@ import (
"bufio"
"fmt"
"io"
"path/filepath"
"strings"
"time"
"unicode"
@ -93,13 +94,18 @@ func getStMode(mode uint32, typ string) (uint32, error) {
return mode, nil
}
func dumpNode(out io.Writer, links map[string]int, verityDigests map[string]string, entry *internal.FileMetadata) error {
path := entry.Name
if path == "" {
func sanitizeName(name string) string {
path := filepath.Clean(name)
if path == "." {
path = "/"
} else if path[0] != '/' {
path = "/" + path
}
return path
}
func dumpNode(out io.Writer, links map[string]int, verityDigests map[string]string, entry *internal.FileMetadata) error {
path := sanitizeName(entry.Name)
if _, err := fmt.Fprint(out, escaped(path, ESCAPE_STANDARD)); err != nil {
return err
@ -133,9 +139,10 @@ func dumpNode(out io.Writer, links map[string]int, verityDigests map[string]stri
var payload string
if entry.Linkname != "" {
payload = entry.Linkname
if entry.Type == internal.TypeLink && payload[0] != '/' {
payload = "/" + payload
if entry.Type == internal.TypeSymlink {
payload = entry.Linkname
} else {
payload = sanitizeName(entry.Linkname)
}
} else {
if len(entry.Digest) > 10 {
@ -198,10 +205,13 @@ func GenerateDump(tocI interface{}, verityDigests map[string]string) (io.Reader,
if e.Linkname == "" {
continue
}
if e.Type == internal.TypeSymlink {
continue
}
links[e.Linkname] = links[e.Linkname] + 1
}
if len(toc.Entries) == 0 || (toc.Entries[0].Name != "" && toc.Entries[0].Name != "/") {
if len(toc.Entries) == 0 || (sanitizeName(toc.Entries[0].Name) != "/") {
root := &internal.FileMetadata{
Name: "/",
Type: internal.TypeDir,

View file

@ -25,6 +25,7 @@ import (
"github.com/containers/storage/pkg/archive"
"github.com/containers/storage/pkg/chunked/compressor"
"github.com/containers/storage/pkg/chunked/internal"
"github.com/containers/storage/pkg/fsverity"
"github.com/containers/storage/pkg/idtools"
"github.com/containers/storage/pkg/system"
"github.com/containers/storage/types"
@ -40,12 +41,14 @@ import (
const (
maxNumberMissingChunks = 1024
autoMergePartsThreshold = 128 // if the gap between two ranges is below this threshold, automatically merge them.
newFileFlags = (unix.O_CREAT | unix.O_TRUNC | unix.O_EXCL | unix.O_WRONLY)
containersOverrideXattr = "user.containers.override_stat"
bigDataKey = "zstd-chunked-manifest"
chunkedData = "zstd-chunked-data"
chunkedLayerDataKey = "zstd-chunked-layer-data"
tocKey = "toc"
fsVerityDigestsKey = "fs-verity-digests"
fileTypeZstdChunked = iota
fileTypeEstargz
@ -71,11 +74,9 @@ type chunkedDiffer struct {
zstdReader *zstd.Decoder
rawReader io.Reader
// contentDigest is the digest of the uncompressed content
// (diffID) when the layer is fully retrieved. If the layer
// is not fully retrieved, instead of using the digest of the
// uncompressed content, it refers to the digest of the TOC.
contentDigest digest.Digest
// tocDigest is the digest of the TOC document when the layer
// is partially pulled.
tocDigest digest.Digest
// convertedToZstdChunked is set to true if the layer needs to
// be converted to the zstd:chunked format before it can be
@ -86,9 +87,18 @@ type chunkedDiffer struct {
// the layer are trusted and should not be validated.
skipValidation bool
// blobDigest is the digest of the whole compressed layer. It is used if
// convertToZstdChunked to validate a layer when it is converted since there
// is no TOC referenced by the manifest.
blobDigest digest.Digest
blobSize int64
storeOpts *types.StoreOptions
useFsVerity graphdriver.DifferFsVerity
fsVerityDigests map[string]string
fsVerityMutex sync.Mutex
}
var xattrsToIgnore = map[string]interface{}{
@ -188,33 +198,7 @@ func (f *seekableFile) GetBlobAt(chunks []ImageSourceChunk) (chan io.ReadCloser,
return streams, errs, nil
}
func convertTarToZstdChunked(destDirectory string, blobSize int64, iss ImageSourceSeekable) (*seekableFile, digest.Digest, map[string]string, error) {
var payload io.ReadCloser
var streams chan io.ReadCloser
var errs chan error
var err error
chunksToRequest := []ImageSourceChunk{
{
Offset: 0,
Length: uint64(blobSize),
},
}
streams, errs, err = iss.GetBlobAt(chunksToRequest)
if err != nil {
return nil, "", nil, err
}
select {
case p := <-streams:
payload = p
case err := <-errs:
return nil, "", nil, err
}
if payload == nil {
return nil, "", nil, errors.New("invalid stream returned")
}
func convertTarToZstdChunked(destDirectory string, payload *os.File) (*seekableFile, digest.Digest, map[string]string, error) {
diff, err := archive.DecompressStream(payload)
if err != nil {
return nil, "", nil, err
@ -235,10 +219,8 @@ func convertTarToZstdChunked(destDirectory string, blobSize int64, iss ImageSour
return nil, "", nil, err
}
digester := digest.Canonical.Digester()
hash := digester.Hash()
if _, err := io.Copy(io.MultiWriter(chunked, hash), diff); err != nil {
convertedOutputDigester := digest.Canonical.Digester()
if _, err := io.Copy(io.MultiWriter(chunked, convertedOutputDigester.Hash()), diff); err != nil {
f.Close()
return nil, "", nil, err
}
@ -249,27 +231,39 @@ func convertTarToZstdChunked(destDirectory string, blobSize int64, iss ImageSour
is := seekableFile{
file: f,
}
return &is, digester.Digest(), newAnnotations, nil
return &is, convertedOutputDigester.Digest(), newAnnotations, nil
}
// GetDiffer returns a differ than can be used with ApplyDiffWithDiffer.
func GetDiffer(ctx context.Context, store storage.Store, blobSize int64, annotations map[string]string, iss ImageSourceSeekable) (graphdriver.Differ, error) {
storeOpts, err := types.DefaultStoreOptionsAutoDetectUID()
func GetDiffer(ctx context.Context, store storage.Store, blobDigest digest.Digest, blobSize int64, annotations map[string]string, iss ImageSourceSeekable) (graphdriver.Differ, error) {
storeOpts, err := types.DefaultStoreOptions()
if err != nil {
return nil, err
}
if _, ok := annotations[internal.ManifestChecksumKey]; ok {
if !parseBooleanPullOption(&storeOpts, "enable_partial_images", true) {
return nil, errors.New("enable_partial_images not configured")
}
_, hasZstdChunkedTOC := annotations[internal.ManifestChecksumKey]
_, hasEstargzTOC := annotations[estargz.TOCJSONDigestAnnotation]
if hasZstdChunkedTOC && hasEstargzTOC {
return nil, errors.New("both zstd:chunked and eStargz TOC found")
}
if hasZstdChunkedTOC {
return makeZstdChunkedDiffer(ctx, store, blobSize, annotations, iss, &storeOpts)
}
if _, ok := annotations[estargz.TOCJSONDigestAnnotation]; ok {
if hasEstargzTOC {
return makeEstargzChunkedDiffer(ctx, store, blobSize, annotations, iss, &storeOpts)
}
return makeConvertFromRawDiffer(ctx, store, blobSize, annotations, iss, &storeOpts)
return makeConvertFromRawDiffer(ctx, store, blobDigest, blobSize, annotations, iss, &storeOpts)
}
func makeConvertFromRawDiffer(ctx context.Context, store storage.Store, blobSize int64, annotations map[string]string, iss ImageSourceSeekable, storeOpts *types.StoreOptions) (*chunkedDiffer, error) {
func makeConvertFromRawDiffer(ctx context.Context, store storage.Store, blobDigest digest.Digest, blobSize int64, annotations map[string]string, iss ImageSourceSeekable, storeOpts *types.StoreOptions) (*chunkedDiffer, error) {
if !parseBooleanPullOption(storeOpts, "convert_images", false) {
return nil, errors.New("convert_images not configured")
}
@ -280,6 +274,8 @@ func makeConvertFromRawDiffer(ctx context.Context, store storage.Store, blobSize
}
return &chunkedDiffer{
fsVerityDigests: make(map[string]string),
blobDigest: blobDigest,
blobSize: blobSize,
convertToZstdChunked: true,
copyBuffer: makeCopyBuffer(),
@ -299,22 +295,23 @@ func makeZstdChunkedDiffer(ctx context.Context, store storage.Store, blobSize in
return nil, err
}
contentDigest, err := digest.Parse(annotations[internal.ManifestChecksumKey])
tocDigest, err := digest.Parse(annotations[internal.ManifestChecksumKey])
if err != nil {
return nil, fmt.Errorf("parse TOC digest %q: %w", annotations[internal.ManifestChecksumKey], err)
}
return &chunkedDiffer{
blobSize: blobSize,
contentDigest: contentDigest,
copyBuffer: makeCopyBuffer(),
fileType: fileTypeZstdChunked,
layersCache: layersCache,
manifest: manifest,
storeOpts: storeOpts,
stream: iss,
tarSplit: tarSplit,
tocOffset: tocOffset,
fsVerityDigests: make(map[string]string),
blobSize: blobSize,
tocDigest: tocDigest,
copyBuffer: makeCopyBuffer(),
fileType: fileTypeZstdChunked,
layersCache: layersCache,
manifest: manifest,
storeOpts: storeOpts,
stream: iss,
tarSplit: tarSplit,
tocOffset: tocOffset,
}, nil
}
@ -328,21 +325,22 @@ func makeEstargzChunkedDiffer(ctx context.Context, store storage.Store, blobSize
return nil, err
}
contentDigest, err := digest.Parse(annotations[estargz.TOCJSONDigestAnnotation])
tocDigest, err := digest.Parse(annotations[estargz.TOCJSONDigestAnnotation])
if err != nil {
return nil, fmt.Errorf("parse TOC digest %q: %w", annotations[estargz.TOCJSONDigestAnnotation], err)
}
return &chunkedDiffer{
blobSize: blobSize,
contentDigest: contentDigest,
copyBuffer: makeCopyBuffer(),
fileType: fileTypeEstargz,
layersCache: layersCache,
manifest: manifest,
storeOpts: storeOpts,
stream: iss,
tocOffset: tocOffset,
fsVerityDigests: make(map[string]string),
blobSize: blobSize,
tocDigest: tocDigest,
copyBuffer: makeCopyBuffer(),
fileType: fileTypeEstargz,
layersCache: layersCache,
manifest: manifest,
storeOpts: storeOpts,
stream: iss,
tocOffset: tocOffset,
}, nil
}
@ -939,6 +937,8 @@ func (c *chunkedDiffer) appendCompressedStreamToFile(compression compressedFileT
return nil
}
type recordFsVerityFunc func(string, *os.File) error
type destinationFile struct {
digester digest.Digester
dirfd int
@ -948,9 +948,10 @@ type destinationFile struct {
options *archive.TarOptions
skipValidation bool
to io.Writer
recordFsVerity recordFsVerityFunc
}
func openDestinationFile(dirfd int, metadata *internal.FileMetadata, options *archive.TarOptions, skipValidation bool) (*destinationFile, error) {
func openDestinationFile(dirfd int, metadata *internal.FileMetadata, options *archive.TarOptions, skipValidation bool, recordFsVerity recordFsVerityFunc) (*destinationFile, error) {
file, err := openFileUnderRoot(metadata.Name, dirfd, newFileFlags, 0)
if err != nil {
return nil, err
@ -977,15 +978,32 @@ func openDestinationFile(dirfd int, metadata *internal.FileMetadata, options *ar
options: options,
dirfd: dirfd,
skipValidation: skipValidation,
recordFsVerity: recordFsVerity,
}, nil
}
func (d *destinationFile) Close() (Err error) {
defer func() {
err := d.file.Close()
var roFile *os.File
var err error
if d.recordFsVerity != nil {
roFile, err = reopenFileReadOnly(d.file)
if err == nil {
defer roFile.Close()
} else if Err == nil {
Err = err
}
}
err = d.file.Close()
if Err == nil {
Err = err
}
if Err == nil && roFile != nil {
Err = d.recordFsVerity(d.metadata.Name, roFile)
}
}()
if !d.skipValidation {
@ -1008,6 +1026,35 @@ func closeDestinationFiles(files chan *destinationFile, errors chan error) {
close(errors)
}
func (c *chunkedDiffer) recordFsVerity(path string, roFile *os.File) error {
if c.useFsVerity == graphdriver.DifferFsVerityDisabled {
return nil
}
// fsverity.EnableVerity doesn't return an error if fs-verity was already
// enabled on the file.
err := fsverity.EnableVerity(path, int(roFile.Fd()))
if err != nil {
if c.useFsVerity == graphdriver.DifferFsVerityRequired {
return err
}
// If it is not required, ignore the error if the filesystem does not support it.
if errors.Is(err, unix.ENOTSUP) || errors.Is(err, unix.ENOTTY) {
return nil
}
}
verity, err := fsverity.MeasureVerity(path, int(roFile.Fd()))
if err != nil {
return err
}
c.fsVerityMutex.Lock()
c.fsVerityDigests[path] = verity
c.fsVerityMutex.Unlock()
return nil
}
func (c *chunkedDiffer) storeMissingFiles(streams chan io.ReadCloser, errs chan error, dest string, dirfd int, missingParts []missingPart, options *archive.TarOptions) (Err error) {
var destFile *destinationFile
@ -1095,7 +1142,11 @@ func (c *chunkedDiffer) storeMissingFiles(streams chan io.ReadCloser, errs chan
}
filesToClose <- destFile
}
destFile, err = openDestinationFile(dirfd, mf.File, options, c.skipValidation)
recordFsVerity := c.recordFsVerity
if c.useFsVerity == graphdriver.DifferFsVerityDisabled {
recordFsVerity = nil
}
destFile, err = openDestinationFile(dirfd, mf.File, options, c.skipValidation, recordFsVerity)
if err != nil {
Err = err
goto exit
@ -1130,22 +1181,12 @@ func (c *chunkedDiffer) storeMissingFiles(streams chan io.ReadCloser, errs chan
}
func mergeMissingChunks(missingParts []missingPart, target int) []missingPart {
getGap := func(missingParts []missingPart, i int) int {
getGap := func(missingParts []missingPart, i int) uint64 {
prev := missingParts[i-1].SourceChunk.Offset + missingParts[i-1].SourceChunk.Length
return int(missingParts[i].SourceChunk.Offset - prev)
}
getCost := func(missingParts []missingPart, i int) int {
cost := getGap(missingParts, i)
if missingParts[i-1].OriginFile != nil {
cost += int(missingParts[i-1].SourceChunk.Length)
}
if missingParts[i].OriginFile != nil {
cost += int(missingParts[i].SourceChunk.Length)
}
return cost
return missingParts[i].SourceChunk.Offset - prev
}
// simple case: merge chunks from the same file.
// simple case: merge chunks from the same file. Useful to reduce the number of parts to work with later.
newMissingParts := missingParts[0:1]
prevIndex := 0
for i := 1; i < len(missingParts); i++ {
@ -1165,28 +1206,50 @@ func mergeMissingChunks(missingParts []missingPart, target int) []missingPart {
}
missingParts = newMissingParts
if len(missingParts) <= target {
return missingParts
type gap struct {
from int
to int
cost uint64
}
// this implementation doesn't account for duplicates, so it could merge
// more than necessary to reach the specified target. Since target itself
// is a heuristic value, it doesn't matter.
costs := make([]int, len(missingParts)-1)
for i := 1; i < len(missingParts); i++ {
costs[i-1] = getCost(missingParts, i)
var requestGaps []gap
lastOffset := int(-1)
numberSourceChunks := 0
for i, c := range missingParts {
if c.OriginFile != nil || c.Hole {
// it does not require a network request
continue
}
numberSourceChunks++
if lastOffset >= 0 {
prevEnd := missingParts[lastOffset].SourceChunk.Offset + missingParts[lastOffset].SourceChunk.Length
cost := c.SourceChunk.Offset - prevEnd
g := gap{
from: lastOffset,
to: i,
cost: cost,
}
requestGaps = append(requestGaps, g)
}
lastOffset = i
}
sort.Ints(costs)
toShrink := len(missingParts) - target
if toShrink >= len(costs) {
toShrink = len(costs) - 1
sort.Slice(requestGaps, func(i, j int) bool {
return requestGaps[i].cost < requestGaps[j].cost
})
toMergeMap := make([]bool, len(missingParts))
remainingToMerge := numberSourceChunks - target
for _, g := range requestGaps {
if remainingToMerge < 0 && g.cost > autoMergePartsThreshold {
continue
}
for i := g.from + 1; i <= g.to; i++ {
toMergeMap[i] = true
}
remainingToMerge--
}
targetValue := costs[toShrink]
newMissingParts = missingParts[0:1]
for i := 1; i < len(missingParts); i++ {
if getCost(missingParts, i) > targetValue {
if !toMergeMap[i] {
newMissingParts = append(newMissingParts, missingParts[i])
} else {
gap := getGap(missingParts, i)
@ -1218,6 +1281,7 @@ func (c *chunkedDiffer) retrieveMissingFiles(stream ImageSourceSeekable, dest st
}
}
missingParts = mergeMissingChunks(missingParts, maxNumberMissingChunks)
calculateChunksToRequest()
// There are some missing files. Prepare a multirange request for the missing chunks.
@ -1231,14 +1295,13 @@ func (c *chunkedDiffer) retrieveMissingFiles(stream ImageSourceSeekable, dest st
}
if _, ok := err.(ErrBadRequest); ok {
requested := len(missingParts)
// If the server cannot handle at least 64 chunks in a single request, just give up.
if requested < 64 {
if len(chunksToRequest) < 64 {
return err
}
// Merge more chunks to request
missingParts = mergeMissingChunks(missingParts, requested/2)
missingParts = mergeMissingChunks(missingParts, len(chunksToRequest)/2)
calculateChunksToRequest()
continue
}
@ -1426,15 +1489,39 @@ type findAndCopyFileOptions struct {
options *archive.TarOptions
}
func reopenFileReadOnly(f *os.File) (*os.File, error) {
path := fmt.Sprintf("/proc/self/fd/%d", f.Fd())
fd, err := unix.Open(path, unix.O_RDONLY|unix.O_CLOEXEC, 0)
if err != nil {
return nil, err
}
return os.NewFile(uintptr(fd), f.Name()), nil
}
func (c *chunkedDiffer) findAndCopyFile(dirfd int, r *internal.FileMetadata, copyOptions *findAndCopyFileOptions, mode os.FileMode) (bool, error) {
finalizeFile := func(dstFile *os.File) error {
if dstFile != nil {
defer dstFile.Close()
if err := setFileAttrs(dirfd, dstFile, mode, r, copyOptions.options, false); err != nil {
return err
}
if dstFile == nil {
return nil
}
return nil
err := setFileAttrs(dirfd, dstFile, mode, r, copyOptions.options, false)
if err != nil {
dstFile.Close()
return err
}
var roFile *os.File
if c.useFsVerity != graphdriver.DifferFsVerityDisabled {
roFile, err = reopenFileReadOnly(dstFile)
}
dstFile.Close()
if err != nil {
return err
}
if roFile == nil {
return nil
}
defer roFile.Close()
return c.recordFsVerity(r.Name, roFile)
}
found, dstFile, _, err := findFileInOtherLayers(c.layersCache, r, dirfd, copyOptions.useHardLinks)
@ -1491,6 +1578,43 @@ func makeEntriesFlat(mergedEntries []internal.FileMetadata) ([]internal.FileMeta
return new, nil
}
func (c *chunkedDiffer) copyAllBlobToFile(destination *os.File) (digest.Digest, error) {
var payload io.ReadCloser
var streams chan io.ReadCloser
var errs chan error
var err error
chunksToRequest := []ImageSourceChunk{
{
Offset: 0,
Length: uint64(c.blobSize),
},
}
streams, errs, err = c.stream.GetBlobAt(chunksToRequest)
if err != nil {
return "", err
}
select {
case p := <-streams:
payload = p
case err := <-errs:
return "", err
}
if payload == nil {
return "", errors.New("invalid stream returned")
}
originalRawDigester := digest.Canonical.Digester()
r := io.TeeReader(payload, originalRawDigester.Hash())
// copy the entire tarball and compute its digest
_, err = io.Copy(destination, r)
return originalRawDigester.Digest(), err
}
func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, differOpts *graphdriver.DifferOptions) (graphdriver.DriverWithDifferOutput, error) {
defer c.layersCache.release()
defer func() {
@ -1499,11 +1623,40 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
}
}()
c.useFsVerity = differOpts.UseFsVerity
// stream to use for reading the zstd:chunked or Estargz file.
stream := c.stream
var uncompressedDigest digest.Digest
if c.convertToZstdChunked {
fileSource, diffID, annotations, err := convertTarToZstdChunked(dest, c.blobSize, c.stream)
fd, err := unix.Open(dest, unix.O_TMPFILE|unix.O_RDWR|unix.O_CLOEXEC, 0o600)
if err != nil {
return graphdriver.DriverWithDifferOutput{}, err
}
blobFile := os.NewFile(uintptr(fd), "blob-file")
defer func() {
if blobFile != nil {
blobFile.Close()
}
}()
// calculate the checksum before accessing the file.
compressedDigest, err := c.copyAllBlobToFile(blobFile)
if err != nil {
return graphdriver.DriverWithDifferOutput{}, err
}
if compressedDigest != c.blobDigest {
return graphdriver.DriverWithDifferOutput{}, fmt.Errorf("invalid digest to convert: expected %q, got %q", c.blobDigest, compressedDigest)
}
if _, err := blobFile.Seek(0, io.SeekStart); err != nil {
return graphdriver.DriverWithDifferOutput{}, err
}
fileSource, diffID, annotations, err := convertTarToZstdChunked(dest, blobFile)
if err != nil {
return graphdriver.DriverWithDifferOutput{}, err
}
@ -1511,6 +1664,10 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
// need to keep it open until the entire file is processed.
defer fileSource.Close()
// Close the file so that the file descriptor is released and the file is deleted.
blobFile.Close()
blobFile = nil
manifest, tarSplit, tocOffset, err := readZstdChunkedManifest(fileSource, c.blobSize, annotations)
if err != nil {
return graphdriver.DriverWithDifferOutput{}, fmt.Errorf("read zstd:chunked manifest: %w", err)
@ -1523,12 +1680,12 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
c.fileType = fileTypeZstdChunked
c.manifest = manifest
c.tarSplit = tarSplit
// since we retrieved the whole file and it was validated, use the diffID instead of the TOC digest.
c.contentDigest = diffID
c.tocOffset = tocOffset
// the file was generated by us and the digest for each file was already computed, no need to validate it again.
c.skipValidation = true
// since we retrieved the whole file and it was validated, set the uncompressed digest.
uncompressedDigest = diffID
}
lcd := chunkedLayerData{
@ -1557,11 +1714,8 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
Artifacts: map[string]interface{}{
tocKey: toc,
},
TOCDigest: c.contentDigest,
}
if !parseBooleanPullOption(c.storeOpts, "enable_partial_images", false) {
return output, errors.New("enable_partial_images not configured")
TOCDigest: c.tocDigest,
UncompressedDigest: uncompressedDigest,
}
// When the hard links deduplication is used, file attributes are ignored because setting them
@ -1678,13 +1832,17 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
mode := os.FileMode(r.Mode)
r.Name = filepath.Clean(r.Name)
r.Linkname = filepath.Clean(r.Linkname)
t, err := typeToTarType(r.Type)
if err != nil {
return output, err
}
r.Name = filepath.Clean(r.Name)
// do not modify the value of symlinks
if r.Linkname != "" && t != tar.TypeSymlink {
r.Linkname = filepath.Clean(r.Linkname)
}
if whiteoutConverter != nil {
hdr := archivetar.Header{
Typeflag: t,
@ -1730,6 +1888,9 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
}
case tar.TypeDir:
if r.Name == "" || r.Name == "." {
output.RootDirMode = &mode
}
if err := safeMkdir(dirfd, mode, r.Name, &r, options); err != nil {
return output, err
}
@ -1851,7 +2012,6 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
}
// There are some missing files. Prepare a multirange request for the missing chunks.
if len(missingParts) > 0 {
missingParts = mergeMissingChunks(missingParts, maxNumberMissingChunks)
if err := c.retrieveMissingFiles(stream, dest, dirfd, missingParts, options); err != nil {
return output, err
}
@ -1867,6 +2027,8 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
logrus.Debugf("Missing %d bytes out of %d (%.2f %%)", missingPartsSize, totalChunksSize, float32(missingPartsSize*100.0)/float32(totalChunksSize))
}
output.Artifacts[fsVerityDigestsKey] = c.fsVerityDigests
return output, nil
}
@ -1926,7 +2088,10 @@ func (c *chunkedDiffer) mergeTocEntries(fileType compressedFileType, entries []i
e.Chunks = make([]*internal.FileMetadata, nChunks+1)
for j := 0; j <= nChunks; j++ {
e.Chunks[j] = &entries[i+j]
// we need a copy here, otherwise we override the
// .Size later
copy := entries[i+j]
e.Chunks[j] = &copy
e.EndOffset = entries[i+j].EndOffset
}
i += nChunks

View file

@ -9,9 +9,10 @@ import (
storage "github.com/containers/storage"
graphdriver "github.com/containers/storage/drivers"
digest "github.com/opencontainers/go-digest"
)
// GetDiffer returns a differ than can be used with ApplyDiffWithDiffer.
func GetDiffer(ctx context.Context, store storage.Store, blobSize int64, annotations map[string]string, iss ImageSourceSeekable) (graphdriver.Differ, error) {
func GetDiffer(ctx context.Context, store storage.Store, blobDigest digest.Digest, blobSize int64, annotations map[string]string, iss ImageSourceSeekable) (graphdriver.Differ, error) {
return nil, errors.New("format not supported on this system")
}

View file

@ -0,0 +1,41 @@
package toc
import (
"errors"
"github.com/containers/storage/pkg/chunked/internal"
digest "github.com/opencontainers/go-digest"
)
// tocJSONDigestAnnotation is the annotation key for the digest of the estargz
// TOC JSON.
// It is defined in github.com/containerd/stargz-snapshotter/estargz as TOCJSONDigestAnnotation
// Duplicate it here to avoid a dependency on the package.
const tocJSONDigestAnnotation = "containerd.io/snapshot/stargz/toc.digest"
// GetTOCDigest returns the digest of the TOC as recorded in the annotations.
// This function retrieves a digest that represents the content of a
// table of contents (TOC) from the image's annotations.
// This is an experimental feature and may be changed/removed in the future.
func GetTOCDigest(annotations map[string]string) (*digest.Digest, error) {
d1, ok1 := annotations[tocJSONDigestAnnotation]
d2, ok2 := annotations[internal.ManifestChecksumKey]
switch {
case ok1 && ok2:
return nil, errors.New("both zstd:chunked and eStargz TOC found")
case ok1:
d, err := digest.Parse(d1)
if err != nil {
return nil, err
}
return &d, nil
case ok2:
d, err := digest.Parse(d2)
if err != nil {
return nil, err
}
return &d, nil
default:
return nil, nil
}
}

View file

@ -97,6 +97,8 @@ type OverlayOptionsConfig struct {
Inodes string `toml:"inodes,omitempty"`
// Do not create a bind mount on the storage home
SkipMountHome string `toml:"skip_mount_home,omitempty"`
// Specify whether composefs must be used to mount the data layers
UseComposefs string `toml:"use_composefs,omitempty"`
// ForceMask indicates the permissions mask (e.g. "0755") to use for new
// files and directories
ForceMask string `toml:"force_mask,omitempty"`
@ -147,6 +149,9 @@ type OptionsConfig struct {
// ignored when building an image.
IgnoreChownErrors string `toml:"ignore_chown_errors,omitempty"`
// Specify whether composefs must be used to mount the data layers
UseComposefs string `toml:"use_composefs,omitempty"`
// ForceMask indicates the permissions mask (e.g. "0755") to use for new
// files and directories.
ForceMask os.FileMode `toml:"force_mask,omitempty"`
@ -283,6 +288,7 @@ func GetGraphDriverOptions(driverName string, options OptionsConfig) []string {
}
case "overlay", "overlay2":
// Specify whether composefs must be used to mount the data layers
if options.Overlay.IgnoreChownErrors != "" {
doptions = append(doptions, fmt.Sprintf("%s.ignore_chown_errors=%s", driverName, options.Overlay.IgnoreChownErrors))
} else if options.IgnoreChownErrors != "" {
@ -316,6 +322,9 @@ func GetGraphDriverOptions(driverName string, options OptionsConfig) []string {
} else if options.ForceMask != 0 {
doptions = append(doptions, fmt.Sprintf("%s.force_mask=%s", driverName, options.ForceMask))
}
if options.Overlay.UseComposefs != "" {
doptions = append(doptions, fmt.Sprintf("%s.use_composefs=%s", driverName, options.Overlay.UseComposefs))
}
case "vfs":
if options.Vfs.IgnoreChownErrors != "" {
doptions = append(doptions, fmt.Sprintf("%s.ignore_chown_errors=%s", driverName, options.Vfs.IgnoreChownErrors))

View file

@ -0,0 +1,45 @@
package fsverity
import (
"errors"
"fmt"
"syscall"
"unsafe"
"golang.org/x/sys/unix"
)
// verityDigest struct represents the digest used for verifying the integrity of a file.
type verityDigest struct {
Fsv unix.FsverityDigest
Buf [64]byte
}
// EnableVerity enables the verity feature on a file represented by the file descriptor 'fd'. The file must be opened
// in read-only mode.
// The 'description' parameter is a human-readable description of the file.
func EnableVerity(description string, fd int) error {
enableArg := unix.FsverityEnableArg{
Version: 1,
Hash_algorithm: unix.FS_VERITY_HASH_ALG_SHA256,
Block_size: 4096,
}
_, _, e1 := syscall.Syscall(unix.SYS_IOCTL, uintptr(fd), uintptr(unix.FS_IOC_ENABLE_VERITY), uintptr(unsafe.Pointer(&enableArg)))
if e1 != 0 && !errors.Is(e1, unix.EEXIST) {
return fmt.Errorf("failed to enable verity for %q: %w", description, e1)
}
return nil
}
// MeasureVerity measures and returns the verity digest for the file represented by 'fd'.
// The 'description' parameter is a human-readable description of the file.
func MeasureVerity(description string, fd int) (string, error) {
var digest verityDigest
digest.Fsv.Size = 64
_, _, e1 := syscall.Syscall(unix.SYS_IOCTL, uintptr(fd), uintptr(unix.FS_IOC_MEASURE_VERITY), uintptr(unsafe.Pointer(&digest)))
if e1 != 0 {
return "", fmt.Errorf("failed to measure verity for %q: %w", description, e1)
}
return fmt.Sprintf("%x", digest.Buf[:digest.Fsv.Size]), nil
}

View file

@ -0,0 +1,21 @@
//go:build !linux
// +build !linux
package fsverity
import (
"fmt"
)
// EnableVerity enables the verity feature on a file represented by the file descriptor 'fd'. The file must be opened
// in read-only mode.
// The 'description' parameter is a human-readable description of the file.
func EnableVerity(description string, fd int) error {
return fmt.Errorf("fs-verity is not supported on this platform")
}
// MeasureVerity measures and returns the verity digest for the file represented by 'fd'.
// The 'description' parameter is a human-readable description of the file.
func MeasureVerity(description string, fd int) (string, error) {
return "", fmt.Errorf("fs-verity is not supported on this platform")
}

View file

@ -6,21 +6,6 @@ import (
"path/filepath"
)
// GetConfigHome returns XDG_CONFIG_HOME.
// GetConfigHome returns $HOME/.config and nil error if XDG_CONFIG_HOME is not set.
//
// See also https://standards.freedesktop.org/basedir-spec/latest/ar01s03.html
func GetConfigHome() (string, error) {
if xdgConfigHome := os.Getenv("XDG_CONFIG_HOME"); xdgConfigHome != "" {
return xdgConfigHome, nil
}
home := Get()
if home == "" {
return "", errors.New("could not get either XDG_CONFIG_HOME or HOME")
}
return filepath.Join(home, ".config"), nil
}
// GetDataHome returns XDG_DATA_HOME.
// GetDataHome returns $HOME/.local/share and nil error if XDG_DATA_HOME is not set.
//

View file

@ -1,5 +1,5 @@
//go:build !linux && !darwin && !freebsd
// +build !linux,!darwin,!freebsd
//go:build !linux && !darwin && !freebsd && !windows
// +build !linux,!darwin,!freebsd,!windows
package homedir
@ -8,6 +8,8 @@ package homedir
import (
"errors"
"os"
"path/filepath"
)
// GetRuntimeDir is unsupported on non-linux system.
@ -19,3 +21,18 @@ func GetRuntimeDir() (string, error) {
func StickRuntimeDirContents(files []string) ([]string, error) {
return nil, errors.New("homedir.StickRuntimeDirContents() is not supported on this system")
}
// GetConfigHome returns XDG_CONFIG_HOME.
// GetConfigHome returns $HOME/.config and nil error if XDG_CONFIG_HOME is not set.
//
// See also https://standards.freedesktop.org/basedir-spec/latest/ar01s03.html
func GetConfigHome() (string, error) {
if xdgConfigHome := os.Getenv("XDG_CONFIG_HOME"); xdgConfigHome != "" {
return xdgConfigHome, nil
}
home := Get()
if home == "" {
return "", errors.New("could not get either XDG_CONFIG_HOME or HOME")
}
return filepath.Join(home, ".config"), nil
}

View file

@ -7,12 +7,16 @@ package homedir
// NOTE: this package has originally been copied from github.com/docker/docker.
import (
"errors"
"fmt"
"os"
"path/filepath"
"strconv"
"strings"
"sync"
"syscall"
"github.com/containers/storage/pkg/unshare"
"github.com/sirupsen/logrus"
)
// Key returns the env var name for the user's home dir based on
@ -40,18 +44,6 @@ func GetShortcutString() string {
return "~"
}
// GetRuntimeDir returns XDG_RUNTIME_DIR.
// XDG_RUNTIME_DIR is typically configured via pam_systemd.
// GetRuntimeDir returns non-nil error if XDG_RUNTIME_DIR is not set.
//
// See also https://standards.freedesktop.org/basedir-spec/latest/ar01s03.html
func GetRuntimeDir() (string, error) {
if xdgRuntimeDir := os.Getenv("XDG_RUNTIME_DIR"); xdgRuntimeDir != "" {
return filepath.EvalSymlinks(xdgRuntimeDir)
}
return "", errors.New("could not get XDG_RUNTIME_DIR")
}
// StickRuntimeDirContents sets the sticky bit on files that are under
// XDG_RUNTIME_DIR, so that the files won't be periodically removed by the system.
//
@ -94,3 +86,98 @@ func stick(f string) error {
m |= os.ModeSticky
return os.Chmod(f, m)
}
var (
rootlessConfigHomeDirError error
rootlessConfigHomeDirOnce sync.Once
rootlessConfigHomeDir string
rootlessRuntimeDirOnce sync.Once
rootlessRuntimeDir string
)
// isWriteableOnlyByOwner checks that the specified permission mask allows write
// access only to the owner.
func isWriteableOnlyByOwner(perm os.FileMode) bool {
return (perm & 0o722) == 0o700
}
// GetConfigHome returns XDG_CONFIG_HOME.
// GetConfigHome returns $HOME/.config and nil error if XDG_CONFIG_HOME is not set.
//
// See also https://standards.freedesktop.org/basedir-spec/latest/ar01s03.html
func GetConfigHome() (string, error) {
rootlessConfigHomeDirOnce.Do(func() {
cfgHomeDir := os.Getenv("XDG_CONFIG_HOME")
if cfgHomeDir == "" {
home := Get()
resolvedHome, err := filepath.EvalSymlinks(home)
if err != nil {
rootlessConfigHomeDirError = fmt.Errorf("cannot resolve %s: %w", home, err)
return
}
tmpDir := filepath.Join(resolvedHome, ".config")
_ = os.MkdirAll(tmpDir, 0o700)
st, err := os.Stat(tmpDir)
if err != nil {
rootlessConfigHomeDirError = err
return
} else if int(st.Sys().(*syscall.Stat_t).Uid) == os.Geteuid() {
cfgHomeDir = tmpDir
} else {
rootlessConfigHomeDirError = fmt.Errorf("path %q exists and it is not owned by the current user", tmpDir)
return
}
}
rootlessConfigHomeDir = cfgHomeDir
})
return rootlessConfigHomeDir, rootlessConfigHomeDirError
}
// GetRuntimeDir returns a directory suitable to store runtime files.
// The function will try to use the XDG_RUNTIME_DIR env variable if it is set.
// XDG_RUNTIME_DIR is typically configured via pam_systemd.
// If XDG_RUNTIME_DIR is not set, GetRuntimeDir will try to find a suitable
// directory for the current user.
//
// See also https://standards.freedesktop.org/basedir-spec/latest/ar01s03.html
func GetRuntimeDir() (string, error) {
var rootlessRuntimeDirError error
rootlessRuntimeDirOnce.Do(func() {
runtimeDir := os.Getenv("XDG_RUNTIME_DIR")
if runtimeDir != "" {
rootlessRuntimeDir, rootlessRuntimeDirError = filepath.EvalSymlinks(runtimeDir)
return
}
uid := strconv.Itoa(unshare.GetRootlessUID())
if runtimeDir == "" {
tmpDir := filepath.Join("/run", "user", uid)
if err := os.MkdirAll(tmpDir, 0o700); err != nil {
logrus.Debug(err)
}
st, err := os.Lstat(tmpDir)
if err == nil && int(st.Sys().(*syscall.Stat_t).Uid) == os.Geteuid() && isWriteableOnlyByOwner(st.Mode().Perm()) {
runtimeDir = tmpDir
}
}
if runtimeDir == "" {
tmpDir := filepath.Join(os.TempDir(), fmt.Sprintf("storage-run-%s", uid))
if err := os.MkdirAll(tmpDir, 0o700); err != nil {
logrus.Debug(err)
}
st, err := os.Lstat(tmpDir)
if err == nil && int(st.Sys().(*syscall.Stat_t).Uid) == os.Geteuid() && isWriteableOnlyByOwner(st.Mode().Perm()) {
runtimeDir = tmpDir
} else {
rootlessRuntimeDirError = fmt.Errorf("path %q exists and it is not writeable only by the current user", tmpDir)
return
}
}
rootlessRuntimeDir = runtimeDir
})
return rootlessRuntimeDir, rootlessRuntimeDirError
}

View file

@ -5,6 +5,7 @@ package homedir
import (
"os"
"path/filepath"
)
// Key returns the env var name for the user's home dir based on
@ -25,8 +26,36 @@ func Get() string {
return home
}
// GetConfigHome returns the home directory of the current user with the help of
// environment variables depending on the target operating system.
// Returned path should be used with "path/filepath" to form new paths.
func GetConfigHome() (string, error) {
return filepath.Join(Get(), ".config"), nil
}
// GetShortcutString returns the string that is shortcut to user's home directory
// in the native shell of the platform running on.
func GetShortcutString() string {
return "%USERPROFILE%" // be careful while using in format functions
}
// StickRuntimeDirContents is a no-op on Windows
func StickRuntimeDirContents(files []string) ([]string, error) {
return nil, nil
}
// GetRuntimeDir returns a directory suitable to store runtime files.
// The function will try to use the XDG_RUNTIME_DIR env variable if it is set.
// XDG_RUNTIME_DIR is typically configured via pam_systemd.
// If XDG_RUNTIME_DIR is not set, GetRuntimeDir will try to find a suitable
// directory for the current user.
//
// See also https://standards.freedesktop.org/basedir-spec/latest/ar01s03.html
func GetRuntimeDir() (string, error) {
data, err := GetDataHome()
if err != nil {
return "", err
}
runtimeDir := filepath.Join(data, "containers", "storage")
return runtimeDir, nil
}

View file

@ -14,7 +14,7 @@ import (
"syscall"
"github.com/containers/storage/pkg/system"
"github.com/opencontainers/runc/libcontainer/user"
"github.com/moby/sys/user"
)
var (

View file

@ -17,7 +17,7 @@ const (
// IsRootless tells us if we are running in rootless mode
func IsRootless() bool {
return false
return os.Getuid() != 0
}
// GetRootlessUID returns the UID of the user in the parent userNS

View file

@ -59,7 +59,7 @@ additionalimagestores = [
# can deduplicate pulling of content, disk storage of content and can allow the
# kernel to use less memory when running containers.
# containers/storage supports three keys
# containers/storage supports four keys
# * enable_partial_images="true" | "false"
# Tells containers/storage to look for files previously pulled in storage
# rather then always pulling them from the container registry.
@ -70,7 +70,12 @@ additionalimagestores = [
# Tells containers/storage where an ostree repository exists that might have
# previously pulled content which can be used when attempting to avoid
# pulling content from the container registry
pull_options = {enable_partial_images = "false", use_hard_links = "false", ostree_repos=""}
# * convert_images = "false" | "true"
# If set to true, containers/storage will convert images to a
# format compatible with partial pulls in order to take advantage
# of local deduplication and hard linking. It is an expensive
# operation so it is not enabled by default.
pull_options = {enable_partial_images = "true", use_hard_links = "false", ostree_repos=""}
# Remap-UIDs/GIDs is the mapping from UIDs/GIDs as they should appear inside of
# a container, to the UIDs/GIDs as they should appear outside of the container,
@ -130,6 +135,9 @@ mountopt = "nodev"
# Set to skip a PRIVATE bind mount on the storage home directory.
# skip_mount_home = "false"
# Set to use composefs to mount data layers with overlay.
# use_composefs = "false"
# Size is used to set a maximum size of the container image.
# size = ""

View file

@ -96,6 +96,9 @@ mountopt = "nodev"
# Set to skip a PRIVATE bind mount on the storage home directory.
# skip_mount_home = "false"
# Set to use composefs to mount data layers with overlay.
# use_composefs = "false"
# Size is used to set a maximum size of the container image.
# size = ""

View file

@ -1,6 +1,7 @@
package storage
import (
_ "embed"
"encoding/base64"
"errors"
"fmt"
@ -10,6 +11,7 @@ import (
"reflect"
"strings"
"sync"
"syscall"
"time"
// register all of the built-in drivers
@ -69,6 +71,19 @@ type metadataStore interface {
rwMetadataStore
}
// ApplyStagedLayerOptions contains options to pass to ApplyStagedLayer
type ApplyStagedLayerOptions struct {
ID string // Mandatory
ParentLayer string // Optional
Names []string // Optional
MountLabel string // Optional
Writeable bool // Optional
LayerOptions *LayerOptions // Optional
DiffOutput *drivers.DriverWithDifferOutput // Mandatory
DiffOptions *drivers.ApplyDiffWithDifferOpts // Mandatory
}
// An roBigDataStore wraps up the read-only big-data related methods of the
// various types of file-based lookaside stores that we implement.
type roBigDataStore interface {
@ -313,14 +328,24 @@ type Store interface {
// ApplyDiffer applies a diff to a layer.
// It is the caller responsibility to clean the staging directory if it is not
// successfully applied with ApplyDiffFromStagingDirectory.
ApplyDiffWithDiffer(to string, options *drivers.ApplyDiffOpts, differ drivers.Differ) (*drivers.DriverWithDifferOutput, error)
ApplyDiffWithDiffer(to string, options *drivers.ApplyDiffWithDifferOpts, differ drivers.Differ) (*drivers.DriverWithDifferOutput, error)
// ApplyDiffFromStagingDirectory uses stagingDirectory to create the diff.
ApplyDiffFromStagingDirectory(to, stagingDirectory string, diffOutput *drivers.DriverWithDifferOutput, options *drivers.ApplyDiffOpts) error
// Deprecated: it will be removed soon. Use ApplyStagedLayer instead.
ApplyDiffFromStagingDirectory(to, stagingDirectory string, diffOutput *drivers.DriverWithDifferOutput, options *drivers.ApplyDiffWithDifferOpts) error
// CleanupStagingDirectory cleanups the staging directory. It can be used to cleanup the staging directory on errors
// Deprecated: it will be removed soon. Use CleanupStagedLayer instead.
CleanupStagingDirectory(stagingDirectory string) error
// ApplyStagedLayer combines the functions of CreateLayer and ApplyDiffFromStagingDirectory,
// marking the layer for automatic removal if applying the diff fails
// for any reason.
ApplyStagedLayer(args ApplyStagedLayerOptions) (*Layer, error)
// CleanupStagedLayer cleanups the staging directory. It can be used to cleanup the staging directory on errors
CleanupStagedLayer(diffOutput *drivers.DriverWithDifferOutput) error
// DifferTarget gets the path to the differ target.
DifferTarget(id string) (string, error)
@ -332,6 +357,10 @@ type Store interface {
// specified uncompressed digest value recorded for them.
LayersByUncompressedDigest(d digest.Digest) ([]Layer, error)
// LayersByTOCDigest returns a slice of the layers with the
// specified TOC digest value recorded for them.
LayersByTOCDigest(d digest.Digest) ([]Layer, error)
// LayerSize returns a cached approximation of the layer's size, or -1
// if we don't have a value on hand.
LayerSize(id string) (int64, error)
@ -391,6 +420,18 @@ type Store interface {
// allow ImagesByDigest to find images by their correct digests.
SetImageBigData(id, key string, data []byte, digestManifest func([]byte) (digest.Digest, error)) error
// ImageDirectory returns a path of a directory which the caller can
// use to store data, specific to the image, which the library does not
// directly manage. The directory will be deleted when the image is
// deleted.
ImageDirectory(id string) (string, error)
// ImageRunDirectory returns a path of a directory which the caller can
// use to store data, specific to the image, which the library does not
// directly manage. The directory will be deleted when the host system
// is restarted.
ImageRunDirectory(id string) (string, error)
// ListLayerBigData retrieves a list of the (possibly large) chunks of
// named data associated with a layer.
ListLayerBigData(id string) ([]string, error)
@ -562,10 +603,19 @@ type LayerOptions struct {
// initialize this layer. If set, it should be a child of the layer
// which we want to use as the parent of the new layer.
TemplateLayer string
// OriginalDigest specifies a digest of the tarstream (diff), if one is
// OriginalDigest specifies a digest of the (possibly-compressed) tarstream (diff), if one is
// provided along with these LayerOptions, and reliably known by the caller.
// The digest might not be exactly the digest of the provided tarstream
// (e.g. the digest might be of a compressed representation, while providing
// an uncompressed one); in that case the caller is responsible for the two matching.
// Use the default "" if this fields is not applicable or the value is not known.
OriginalDigest digest.Digest
// OriginalSize specifies a size of the (possibly-compressed) tarstream corresponding
// to OriginalDigest.
// If the digest does not match the provided tarstream, OriginalSize must match OriginalDigest,
// not the tarstream.
// Use nil if not applicable or not known.
OriginalSize *int64
// UncompressedDigest specifies a digest of the uncompressed version (“DiffID”)
// of the tarstream (diff), if one is provided along with these LayerOptions,
// and reliably known by the caller.
@ -922,11 +972,13 @@ func (s *store) load() error {
if err := os.MkdirAll(gipath, 0o700); err != nil {
return err
}
ris, err := newImageStore(gipath)
imageStore, err := newImageStore(gipath)
if err != nil {
return err
}
s.imageStore = ris
s.imageStore = imageStore
s.rwImageStores = []rwImageStore{imageStore}
gcpath := filepath.Join(s.graphRoot, driverPrefix+"containers")
if err := os.MkdirAll(gcpath, 0o700); err != nil {
@ -944,13 +996,16 @@ func (s *store) load() error {
s.containerStore = rcs
for _, store := range driver.AdditionalImageStores() {
additionalImageStores := s.graphDriver.AdditionalImageStores()
if s.imageStoreDir != "" {
additionalImageStores = append([]string{s.graphRoot}, additionalImageStores...)
}
for _, store := range additionalImageStores {
gipath := filepath.Join(store, driverPrefix+"images")
var ris roImageStore
if s.imageStoreDir != "" && store == s.graphRoot {
// If --imagestore was set and current store
// is `graphRoot` then mount it as a `rw` additional
// store instead of `readonly` additional store.
// both the graphdriver and the imagestore must be used read-write.
if store == s.imageStoreDir || store == s.graphRoot {
imageStore, err := newImageStore(gipath)
if err != nil {
return err
@ -960,6 +1015,10 @@ func (s *store) load() error {
} else {
ris, err = newROImageStore(gipath)
if err != nil {
if errors.Is(err, syscall.EROFS) {
logrus.Debugf("Ignoring creation of lockfiles on read-only file systems %q, %v", gipath, err)
continue
}
return err
}
}
@ -1031,15 +1090,9 @@ func (s *store) stopUsingGraphDriver() {
// Almost all users should use startUsingGraphDriver instead.
// The caller must hold s.graphLock.
func (s *store) createGraphDriverLocked() (drivers.Driver, error) {
driverRoot := s.imageStoreDir
imageStoreBase := s.graphRoot
if driverRoot == "" {
driverRoot = s.graphRoot
imageStoreBase = ""
}
config := drivers.Options{
Root: driverRoot,
ImageStore: imageStoreBase,
Root: s.graphRoot,
ImageStore: s.imageStoreDir,
RunRoot: s.runRoot,
DriverPriority: s.graphDriverPriority,
DriverOptions: s.graphOptions,
@ -1069,15 +1122,15 @@ func (s *store) getLayerStoreLocked() (rwLayerStore, error) {
if err := os.MkdirAll(rlpath, 0o700); err != nil {
return nil, err
}
imgStoreRoot := s.imageStoreDir
if imgStoreRoot == "" {
imgStoreRoot = s.graphRoot
}
glpath := filepath.Join(imgStoreRoot, driverPrefix+"layers")
glpath := filepath.Join(s.graphRoot, driverPrefix+"layers")
if err := os.MkdirAll(glpath, 0o700); err != nil {
return nil, err
}
rls, err := s.newLayerStore(rlpath, glpath, s.graphDriver, s.transientStore)
ilpath := ""
if s.imageStoreDir != "" {
ilpath = filepath.Join(s.imageStoreDir, driverPrefix+"layers")
}
rls, err := s.newLayerStore(rlpath, glpath, ilpath, s.graphDriver, s.transientStore)
if err != nil {
return nil, err
}
@ -1108,8 +1161,10 @@ func (s *store) getROLayerStoresLocked() ([]roLayerStore, error) {
if err := os.MkdirAll(rlpath, 0o700); err != nil {
return nil, err
}
for _, store := range s.graphDriver.AdditionalImageStores() {
glpath := filepath.Join(store, driverPrefix+"layers")
rls, err := newROLayerStore(rlpath, glpath, s.graphDriver)
if err != nil {
return nil, err
@ -1390,8 +1445,7 @@ func (s *store) canUseShifting(uidmap, gidmap []idtools.IDMap) bool {
return true
}
func (s *store) PutLayer(id, parent string, names []string, mountLabel string, writeable bool, lOptions *LayerOptions, diff io.Reader) (*Layer, int64, error) {
var parentLayer *Layer
func (s *store) putLayer(id, parent string, names []string, mountLabel string, writeable bool, lOptions *LayerOptions, diff io.Reader, slo *stagedLayerOptions) (*Layer, int64, error) {
rlstore, rlstores, err := s.bothLayerStoreKinds()
if err != nil {
return nil, -1, err
@ -1404,6 +1458,8 @@ func (s *store) PutLayer(id, parent string, names []string, mountLabel string, w
return nil, -1, err
}
defer s.containerStore.stopWriting()
var parentLayer *Layer
var options LayerOptions
if lOptions != nil {
options = *lOptions
@ -1463,6 +1519,7 @@ func (s *store) PutLayer(id, parent string, names []string, mountLabel string, w
}
layerOptions := LayerOptions{
OriginalDigest: options.OriginalDigest,
OriginalSize: options.OriginalSize,
UncompressedDigest: options.UncompressedDigest,
Flags: options.Flags,
}
@ -1476,7 +1533,11 @@ func (s *store) PutLayer(id, parent string, names []string, mountLabel string, w
GIDMap: copyIDMap(gidMap),
}
}
return rlstore.create(id, parentLayer, names, mountLabel, nil, &layerOptions, writeable, diff)
return rlstore.create(id, parentLayer, names, mountLabel, nil, &layerOptions, writeable, diff, slo)
}
func (s *store) PutLayer(id, parent string, names []string, mountLabel string, writeable bool, lOptions *LayerOptions, diff io.Reader) (*Layer, int64, error) {
return s.putLayer(id, parent, names, mountLabel, writeable, lOptions, diff, nil)
}
func (s *store) CreateLayer(id, parent string, names []string, mountLabel string, writeable bool, options *LayerOptions) (*Layer, error) {
@ -1686,7 +1747,7 @@ func (s *store) imageTopLayerForMapping(image *Image, ristore roImageStore, rlst
}
}
layerOptions.TemplateLayer = layer.ID
mappedLayer, _, err := rlstore.create("", parentLayer, nil, layer.MountLabel, nil, &layerOptions, false, nil)
mappedLayer, _, err := rlstore.create("", parentLayer, nil, layer.MountLabel, nil, &layerOptions, false, nil, nil)
if err != nil {
return nil, fmt.Errorf("creating an ID-mapped copy of layer %q: %w", layer.ID, err)
}
@ -1857,7 +1918,7 @@ func (s *store) CreateContainer(id string, names []string, image, layer, metadat
options.Flags[mountLabelFlag] = mountLabel
}
clayer, _, err := rlstore.create(layer, imageTopLayer, nil, mlabel, options.StorageOpt, layerOptions, true, nil)
clayer, _, err := rlstore.create(layer, imageTopLayer, nil, mlabel, options.StorageOpt, layerOptions, true, nil, nil)
if err != nil {
return nil, err
}
@ -2530,7 +2591,7 @@ func (s *store) DeleteImage(id string, commit bool) (layers []string, err error)
if err := s.writeToAllStores(func(rlstore rwLayerStore) error {
// Delete image from all available imagestores configured to be used.
imageFound := false
for _, is := range append([]rwImageStore{s.imageStore}, s.rwImageStores...) {
for _, is := range s.rwImageStores {
if is != s.imageStore {
// This is an additional writeable image store
// so we must perform lock
@ -2741,7 +2802,13 @@ func (s *store) Status() ([][2]string, error) {
return rlstore.Status()
}
//go:embed VERSION
var storageVersion string
func (s *store) Version() ([][2]string, error) {
if trimmedVersion := strings.TrimSpace(storageVersion); trimmedVersion != "" {
return [][2]string{{"Version", trimmedVersion}}, nil
}
return [][2]string{}, nil
}
@ -2915,16 +2982,29 @@ func (s *store) Diff(from, to string, options *DiffOptions) (io.ReadCloser, erro
return nil, ErrLayerUnknown
}
func (s *store) ApplyDiffFromStagingDirectory(to, stagingDirectory string, diffOutput *drivers.DriverWithDifferOutput, options *drivers.ApplyDiffOpts) error {
func (s *store) ApplyDiffFromStagingDirectory(to, stagingDirectory string, diffOutput *drivers.DriverWithDifferOutput, options *drivers.ApplyDiffWithDifferOpts) error {
if stagingDirectory != diffOutput.Target {
return fmt.Errorf("invalid value for staging directory, it must be the same as the differ target directory")
}
_, err := writeToLayerStore(s, func(rlstore rwLayerStore) (struct{}, error) {
if !rlstore.Exists(to) {
return struct{}{}, ErrLayerUnknown
}
return struct{}{}, rlstore.ApplyDiffFromStagingDirectory(to, stagingDirectory, diffOutput, options)
return struct{}{}, rlstore.applyDiffFromStagingDirectory(to, diffOutput, options)
})
return err
}
func (s *store) ApplyStagedLayer(args ApplyStagedLayerOptions) (*Layer, error) {
slo := stagedLayerOptions{
DiffOutput: args.DiffOutput,
DiffOptions: args.DiffOptions,
}
layer, _, err := s.putLayer(args.ID, args.ParentLayer, args.Names, args.MountLabel, args.Writeable, args.LayerOptions, nil, &slo)
return layer, err
}
func (s *store) CleanupStagingDirectory(stagingDirectory string) error {
_, err := writeToLayerStore(s, func(rlstore rwLayerStore) (struct{}, error) {
return struct{}{}, rlstore.CleanupStagingDirectory(stagingDirectory)
@ -2932,7 +3012,14 @@ func (s *store) CleanupStagingDirectory(stagingDirectory string) error {
return err
}
func (s *store) ApplyDiffWithDiffer(to string, options *drivers.ApplyDiffOpts, differ drivers.Differ) (*drivers.DriverWithDifferOutput, error) {
func (s *store) CleanupStagedLayer(diffOutput *drivers.DriverWithDifferOutput) error {
_, err := writeToLayerStore(s, func(rlstore rwLayerStore) (struct{}, error) {
return struct{}{}, rlstore.CleanupStagingDirectory(diffOutput.Target)
})
return err
}
func (s *store) ApplyDiffWithDiffer(to string, options *drivers.ApplyDiffWithDifferOpts, differ drivers.Differ) (*drivers.DriverWithDifferOutput, error) {
return writeToLayerStore(s, func(rlstore rwLayerStore) (*drivers.DriverWithDifferOutput, error) {
if to != "" && !rlstore.Exists(to) {
return nil, ErrLayerUnknown
@ -2994,6 +3081,13 @@ func (s *store) LayersByUncompressedDigest(d digest.Digest) ([]Layer, error) {
return s.layersByMappedDigest(func(r roLayerStore, d digest.Digest) ([]Layer, error) { return r.LayersByUncompressedDigest(d) }, d)
}
func (s *store) LayersByTOCDigest(d digest.Digest) ([]Layer, error) {
if err := d.Validate(); err != nil {
return nil, fmt.Errorf("looking for TOC matching digest %q: %w", d, err)
}
return s.layersByMappedDigest(func(r roLayerStore, d digest.Digest) ([]Layer, error) { return r.LayersByTOCDigest(d) }, d)
}
func (s *store) LayerSize(id string) (int64, error) {
if res, done, err := readAllLayerStores(s, func(store roLayerStore) (int64, bool, error) {
if store.Exists(id) {
@ -3288,6 +3382,27 @@ func (s *store) ContainerByLayer(id string) (*Container, error) {
return nil, ErrContainerUnknown
}
func (s *store) ImageDirectory(id string) (string, error) {
foundImage := false
if res, done, err := readAllImageStores(s, func(store roImageStore) (string, bool, error) {
if store.Exists(id) {
foundImage = true
}
middleDir := s.graphDriverName + "-images"
gipath := filepath.Join(s.GraphRoot(), middleDir, id, "userdata")
if err := os.MkdirAll(gipath, 0o700); err != nil {
return "", true, err
}
return gipath, true, nil
}); done {
return res, err
}
if foundImage {
return "", fmt.Errorf("locating image with ID %q (consider removing the image to resolve the issue): %w", id, os.ErrNotExist)
}
return "", fmt.Errorf("locating image with ID %q: %w", id, ErrImageUnknown)
}
func (s *store) ContainerDirectory(id string) (string, error) {
res, _, err := readContainerStore(s, func() (string, bool, error) {
id, err := s.containerStore.Lookup(id)
@ -3305,6 +3420,28 @@ func (s *store) ContainerDirectory(id string) (string, error) {
return res, err
}
func (s *store) ImageRunDirectory(id string) (string, error) {
foundImage := false
if res, done, err := readAllImageStores(s, func(store roImageStore) (string, bool, error) {
if store.Exists(id) {
foundImage = true
}
middleDir := s.graphDriverName + "-images"
rcpath := filepath.Join(s.RunRoot(), middleDir, id, "userdata")
if err := os.MkdirAll(rcpath, 0o700); err != nil {
return "", true, err
}
return rcpath, true, nil
}); done {
return res, err
}
if foundImage {
return "", fmt.Errorf("locating image with ID %q (consider removing the image to resolve the issue): %w", id, os.ErrNotExist)
}
return "", fmt.Errorf("locating image with ID %q: %w", id, ErrImageUnknown)
}
func (s *store) ContainerRunDirectory(id string) (string, error) {
res, _, err := readContainerStore(s, func() (string, bool, error) {
id, err := s.containerStore.Lookup(id)
@ -3545,8 +3682,8 @@ func SetDefaultConfigFilePath(path string) {
}
// DefaultConfigFile returns the path to the storage config file used
func DefaultConfigFile(rootless bool) (string, error) {
return types.DefaultConfigFile(rootless)
func DefaultConfigFile() (string, error) {
return types.DefaultConfigFile()
}
// ReloadConfigurationFile parses the specified configuration file and overrides

View file

@ -11,7 +11,9 @@ import (
"github.com/BurntSushi/toml"
cfg "github.com/containers/storage/pkg/config"
"github.com/containers/storage/pkg/homedir"
"github.com/containers/storage/pkg/idtools"
"github.com/containers/storage/pkg/unshare"
"github.com/sirupsen/logrus"
)
@ -87,7 +89,7 @@ func loadDefaultStoreOptions() {
_, err := os.Stat(defaultOverrideConfigFile)
if err == nil {
// The DefaultConfigFile(rootless) function returns the path
// The DefaultConfigFile() function returns the path
// of the used storage.conf file, by returning defaultConfigFile
// If override exists containers/storage uses it by default.
defaultConfigFile = defaultOverrideConfigFile
@ -109,21 +111,41 @@ func loadDefaultStoreOptions() {
setDefaults()
}
// defaultStoreOptionsIsolated is an internal implementation detail of DefaultStoreOptions to allow testing.
// Everyone but the tests this is intended for should only call DefaultStoreOptions, never this function.
func defaultStoreOptionsIsolated(rootless bool, rootlessUID int, storageConf string) (StoreOptions, error) {
// loadStoreOptions returns the default storage ops for containers
func loadStoreOptions() (StoreOptions, error) {
storageConf, err := DefaultConfigFile()
if err != nil {
return defaultStoreOptions, err
}
return loadStoreOptionsFromConfFile(storageConf)
}
// usePerUserStorage returns whether the user private storage must be used.
// We cannot simply use the unshare.IsRootless() condition, because
// that checks only if the current process needs a user namespace to
// work and it would break cases where the process is already created
// in a user namespace (e.g. nested Podman/Buildah) and the desired
// behavior is to use system paths instead of user private paths.
func usePerUserStorage() bool {
return unshare.IsRootless() && unshare.GetRootlessUID() != 0
}
// loadStoreOptionsFromConfFile is an internal implementation detail of DefaultStoreOptions to allow testing.
// Everyone but the tests this is intended for should only call loadStoreOptions, never this function.
func loadStoreOptionsFromConfFile(storageConf string) (StoreOptions, error) {
var (
defaultRootlessRunRoot string
defaultRootlessGraphRoot string
err error
)
defaultStoreOptionsOnce.Do(loadDefaultStoreOptions)
if loadDefaultStoreOptionsErr != nil {
return StoreOptions{}, loadDefaultStoreOptionsErr
}
storageOpts := defaultStoreOptions
if rootless && rootlessUID != 0 {
storageOpts, err = getRootlessStorageOpts(rootlessUID, storageOpts)
if usePerUserStorage() {
storageOpts, err = getRootlessStorageOpts(storageOpts)
if err != nil {
return storageOpts, err
}
@ -137,7 +159,7 @@ func defaultStoreOptionsIsolated(rootless bool, rootlessUID int, storageConf str
defaultRootlessGraphRoot = storageOpts.GraphRoot
storageOpts = StoreOptions{}
reloadConfigurationFileIfNeeded(storageConf, &storageOpts)
if rootless && rootlessUID != 0 {
if usePerUserStorage() {
// If the file did not specify a graphroot or runroot,
// set sane defaults so we don't try and use root-owned
// directories
@ -156,6 +178,7 @@ func defaultStoreOptionsIsolated(rootless bool, rootlessUID int, storageConf str
if storageOpts.RunRoot == "" {
return storageOpts, fmt.Errorf("runroot must be set")
}
rootlessUID := unshare.GetRootlessUID()
runRoot, err := expandEnvPath(storageOpts.RunRoot, rootlessUID)
if err != nil {
return storageOpts, err
@ -186,26 +209,17 @@ func defaultStoreOptionsIsolated(rootless bool, rootlessUID int, storageConf str
return storageOpts, nil
}
// loadStoreOptions returns the default storage ops for containers
func loadStoreOptions(rootless bool, rootlessUID int) (StoreOptions, error) {
storageConf, err := DefaultConfigFile(rootless && rootlessUID != 0)
if err != nil {
return defaultStoreOptions, err
}
return defaultStoreOptionsIsolated(rootless, rootlessUID, storageConf)
}
// UpdateOptions should be called iff container engine received a SIGHUP,
// otherwise use DefaultStoreOptions
func UpdateStoreOptions(rootless bool, rootlessUID int) (StoreOptions, error) {
storeOptions, storeError = loadStoreOptions(rootless, rootlessUID)
func UpdateStoreOptions() (StoreOptions, error) {
storeOptions, storeError = loadStoreOptions()
return storeOptions, storeError
}
// DefaultStoreOptions returns the default storage ops for containers
func DefaultStoreOptions(rootless bool, rootlessUID int) (StoreOptions, error) {
func DefaultStoreOptions() (StoreOptions, error) {
once.Do(func() {
storeOptions, storeError = loadStoreOptions(rootless, rootlessUID)
storeOptions, storeError = loadStoreOptions()
})
return storeOptions, storeError
}
@ -270,14 +284,26 @@ func isRootlessDriver(driver string) bool {
}
// getRootlessStorageOpts returns the storage opts for containers running as non root
func getRootlessStorageOpts(rootlessUID int, systemOpts StoreOptions) (StoreOptions, error) {
func getRootlessStorageOpts(systemOpts StoreOptions) (StoreOptions, error) {
var opts StoreOptions
dataDir, rootlessRuntime, err := getRootlessDirInfo(rootlessUID)
rootlessUID := unshare.GetRootlessUID()
dataDir, err := homedir.GetDataHome()
if err != nil {
return opts, err
}
opts.RunRoot = rootlessRuntime
rootlessRuntime, err := homedir.GetRuntimeDir()
if err != nil {
return opts, err
}
opts.RunRoot = filepath.Join(rootlessRuntime, "containers")
if err := os.MkdirAll(opts.RunRoot, 0o700); err != nil {
return opts, fmt.Errorf("unable to make rootless runtime: %w", err)
}
opts.PullOptions = systemOpts.PullOptions
if systemOpts.RootlessStoragePath != "" {
opts.GraphRoot, err = expandEnvPath(systemOpts.RootlessStoragePath, rootlessUID)
@ -343,12 +369,6 @@ func getRootlessStorageOpts(rootlessUID int, systemOpts StoreOptions) (StoreOpti
return opts, nil
}
// DefaultStoreOptionsAutoDetectUID returns the default storage ops for containers
func DefaultStoreOptionsAutoDetectUID() (StoreOptions, error) {
uid := getRootlessUID()
return DefaultStoreOptions(uid != 0, uid)
}
var prevReloadConfig = struct {
storeOptions *StoreOptions
mod time.Time
@ -518,8 +538,8 @@ func Options() (StoreOptions, error) {
}
// Save overwrites the tomlConfig in storage.conf with the given conf
func Save(conf TomlConfig, rootless bool) error {
configFile, err := DefaultConfigFile(rootless)
func Save(conf TomlConfig) error {
configFile, err := DefaultConfigFile()
if err != nil {
return err
}
@ -537,10 +557,10 @@ func Save(conf TomlConfig, rootless bool) error {
}
// StorageConfig is used to retrieve the storage.conf toml in order to overwrite it
func StorageConfig(rootless bool) (*TomlConfig, error) {
func StorageConfig() (*TomlConfig, error) {
config := new(TomlConfig)
configFile, err := DefaultConfigFile(rootless)
configFile, err := DefaultConfigFile()
if err != nil {
return nil, err
}

View file

@ -2,162 +2,15 @@ package types
import (
"errors"
"fmt"
"os"
"path/filepath"
"strconv"
"strings"
"github.com/containers/storage/pkg/homedir"
"github.com/containers/storage/pkg/system"
"github.com/sirupsen/logrus"
)
// GetRootlessRuntimeDir returns the runtime directory when running as non root
func GetRootlessRuntimeDir(rootlessUID int) (string, error) {
path, err := getRootlessRuntimeDir(rootlessUID)
if err != nil {
return "", err
}
path = filepath.Join(path, "containers")
if err := os.MkdirAll(path, 0o700); err != nil {
return "", fmt.Errorf("unable to make rootless runtime: %w", err)
}
return path, nil
}
type rootlessRuntimeDirEnvironment interface {
getProcCommandFile() string
getRunUserDir() string
getTmpPerUserDir() string
homeDirGetRuntimeDir() (string, error)
systemLstat(string) (*system.StatT, error)
homedirGet() string
}
type rootlessRuntimeDirEnvironmentImplementation struct {
procCommandFile string
runUserDir string
tmpPerUserDir string
}
func (env rootlessRuntimeDirEnvironmentImplementation) getProcCommandFile() string {
return env.procCommandFile
}
func (env rootlessRuntimeDirEnvironmentImplementation) getRunUserDir() string {
return env.runUserDir
}
func (env rootlessRuntimeDirEnvironmentImplementation) getTmpPerUserDir() string {
return env.tmpPerUserDir
}
func (rootlessRuntimeDirEnvironmentImplementation) homeDirGetRuntimeDir() (string, error) {
return homedir.GetRuntimeDir()
}
func (rootlessRuntimeDirEnvironmentImplementation) systemLstat(path string) (*system.StatT, error) {
return system.Lstat(path)
}
func (rootlessRuntimeDirEnvironmentImplementation) homedirGet() string {
return homedir.Get()
}
func isRootlessRuntimeDirOwner(dir string, env rootlessRuntimeDirEnvironment) bool {
st, err := env.systemLstat(dir)
return err == nil && int(st.UID()) == os.Getuid() && st.Mode()&0o700 == 0o700 && st.Mode()&0o066 == 0o000
}
// getRootlessRuntimeDirIsolated is an internal implementation detail of getRootlessRuntimeDir to allow testing.
// Everyone but the tests this is intended for should only call getRootlessRuntimeDir, never this function.
func getRootlessRuntimeDirIsolated(env rootlessRuntimeDirEnvironment) (string, error) {
runtimeDir, err := env.homeDirGetRuntimeDir()
if err == nil {
return runtimeDir, nil
}
initCommand, err := os.ReadFile(env.getProcCommandFile())
if err != nil || string(initCommand) == "systemd" {
runUserDir := env.getRunUserDir()
if isRootlessRuntimeDirOwner(runUserDir, env) {
return runUserDir, nil
}
}
tmpPerUserDir := env.getTmpPerUserDir()
if tmpPerUserDir != "" {
if _, err := env.systemLstat(tmpPerUserDir); os.IsNotExist(err) {
if err := os.Mkdir(tmpPerUserDir, 0o700); err != nil {
logrus.Errorf("Failed to create temp directory for user: %v", err)
} else {
return tmpPerUserDir, nil
}
} else if isRootlessRuntimeDirOwner(tmpPerUserDir, env) {
return tmpPerUserDir, nil
}
}
homeDir := env.homedirGet()
if homeDir == "" {
return "", errors.New("neither XDG_RUNTIME_DIR nor temp dir nor HOME was set non-empty")
}
resolvedHomeDir, err := filepath.EvalSymlinks(homeDir)
if err != nil {
return "", err
}
return filepath.Join(resolvedHomeDir, "rundir"), nil
}
func getRootlessRuntimeDir(rootlessUID int) (string, error) {
return getRootlessRuntimeDirIsolated(
rootlessRuntimeDirEnvironmentImplementation{
"/proc/1/comm",
fmt.Sprintf("/run/user/%d", rootlessUID),
fmt.Sprintf("%s/containers-user-%d", os.TempDir(), rootlessUID),
},
)
}
// getRootlessDirInfo returns the parent path of where the storage for containers and
// volumes will be in rootless mode
func getRootlessDirInfo(rootlessUID int) (string, string, error) {
rootlessRuntime, err := GetRootlessRuntimeDir(rootlessUID)
if err != nil {
return "", "", err
}
dataDir, err := homedir.GetDataHome()
if err == nil {
return dataDir, rootlessRuntime, nil
}
home := homedir.Get()
if home == "" {
return "", "", fmt.Errorf("neither XDG_DATA_HOME nor HOME was set non-empty: %w", err)
}
// runc doesn't like symlinks in the rootfs path, and at least
// on CoreOS /home is a symlink to /var/home, so resolve any symlink.
resolvedHome, err := filepath.EvalSymlinks(home)
if err != nil {
return "", "", err
}
dataDir = filepath.Join(resolvedHome, ".local", "share")
return dataDir, rootlessRuntime, nil
}
func getRootlessUID() int {
uidEnv := os.Getenv("_CONTAINERS_ROOTLESS_UID")
if uidEnv != "" {
u, _ := strconv.Atoi(uidEnv)
return u
}
return os.Geteuid()
}
func expandEnvPath(path string, rootlessUID int) (string, error) {
var err error
path = strings.Replace(path, "$UID", strconv.Itoa(rootlessUID), -1)
@ -169,7 +22,7 @@ func expandEnvPath(path string, rootlessUID int) (string, error) {
return newpath, nil
}
func DefaultConfigFile(rootless bool) (string, error) {
func DefaultConfigFile() (string, error) {
if defaultConfigFileSet {
return defaultConfigFile, nil
}
@ -177,7 +30,7 @@ func DefaultConfigFile(rootless bool) (string, error) {
if path, ok := os.LookupEnv(storageConfEnv); ok {
return path, nil
}
if !rootless {
if !usePerUserStorage() {
if _, err := os.Stat(defaultOverrideConfigFile); err == nil {
return defaultOverrideConfigFile, nil
}

View file

@ -11,7 +11,7 @@ import (
"github.com/containers/storage/pkg/idtools"
"github.com/containers/storage/pkg/unshare"
"github.com/containers/storage/types"
libcontainerUser "github.com/opencontainers/runc/libcontainer/user"
libcontainerUser "github.com/moby/sys/user"
"github.com/sirupsen/logrus"
)
@ -175,7 +175,7 @@ outer:
// We need to create a temporary layer so we can mount it and lookup the
// maximum IDs used.
clayer, _, err := rlstore.create("", topLayer, nil, "", nil, layerOptions, false, nil)
clayer, _, err := rlstore.create("", topLayer, nil, "", nil, layerOptions, false, nil, nil)
if err != nil {
return 0, err
}

View file

@ -11,19 +11,9 @@ func ParseIDMapping(UIDMapSlice, GIDMapSlice []string, subUIDMap, subGIDMap stri
return types.ParseIDMapping(UIDMapSlice, GIDMapSlice, subUIDMap, subGIDMap)
}
// GetRootlessRuntimeDir returns the runtime directory when running as non root
func GetRootlessRuntimeDir(rootlessUID int) (string, error) {
return types.GetRootlessRuntimeDir(rootlessUID)
}
// DefaultStoreOptionsAutoDetectUID returns the default storage options for containers
func DefaultStoreOptionsAutoDetectUID() (types.StoreOptions, error) {
return types.DefaultStoreOptionsAutoDetectUID()
}
// DefaultStoreOptions returns the default storage options for containers
func DefaultStoreOptions(rootless bool, rootlessUID int) (types.StoreOptions, error) {
return types.DefaultStoreOptions(rootless, rootlessUID)
func DefaultStoreOptions() (types.StoreOptions, error) {
return types.DefaultStoreOptions()
}
func validateMountOptions(mountOptions []string) error {