upload/koji: add a retrying mechanism for CGImport
CGImport quite often fails with the following error: Fault(1000): File size 735051776 for Fedora-IoT-38.raw.xz (expected 738785372) doesn't match. Corrupted upload? When I inspect the file manually, everything seems fine, though. I believe that this because of NFS inconsistency when multiple DNS-balanced kojihubs are used in the setup (which is what Fedora uses). The addded loop implements a retrying mechanism for the CGImport call to try again whenever we see this issue. Note that this isn't caught by other HTTP retrying mechanism because a failed XMLRPC call returns code 200. Signed-off-by: Ondřej Budai <ondrej@budai.cz>
This commit is contained in:
parent
ce5e41f980
commit
fdc4f54be8
1 changed files with 28 additions and 10 deletions
|
|
@ -3,10 +3,6 @@ package koji
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
"net"
|
|
||||||
"strings"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
// koji uses MD5 hashes
|
// koji uses MD5 hashes
|
||||||
/* #nosec G501 */
|
/* #nosec G501 */
|
||||||
"crypto/md5"
|
"crypto/md5"
|
||||||
|
|
@ -17,15 +13,18 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"hash/adler32"
|
"hash/adler32"
|
||||||
"io"
|
"io"
|
||||||
|
"net"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/url"
|
"net/url"
|
||||||
"os"
|
"os"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
rh "github.com/hashicorp/go-retryablehttp"
|
||||||
"github.com/kolo/xmlrpc"
|
"github.com/kolo/xmlrpc"
|
||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
"github.com/ubccr/kerby/khttp"
|
"github.com/ubccr/kerby/khttp"
|
||||||
|
|
||||||
rh "github.com/hashicorp/go-retryablehttp"
|
|
||||||
"github.com/osbuild/osbuild-composer/internal/rpmmd"
|
"github.com/osbuild/osbuild-composer/internal/rpmmd"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -284,13 +283,32 @@ func (k *Koji) CGImport(build ImageBuild, buildRoots []BuildRoot, images []Image
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
var result CGImportResult
|
const retryCount = 10
|
||||||
err = k.xmlrpc.Call("CGImport", []interface{}{string(metadata), directory, token}, &result)
|
const retryDelay = time.Second
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
for attempt := 0; attempt < retryCount; attempt += 1 {
|
||||||
|
var result CGImportResult
|
||||||
|
err = k.xmlrpc.Call("CGImport", []interface{}{string(metadata), directory, token}, &result)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
// Retry when the error mentions a corrupted upload. It's usually
|
||||||
|
// just because of NFS inconsistency when the kojihub has multiple
|
||||||
|
// replicas.
|
||||||
|
if strings.Contains(err.Error(), "Corrupted upload") {
|
||||||
|
time.Sleep(retryDelay)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fail immediately on other errors, they are probably legitimate
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
logrus.Infof("CGImport succeeded after %d attempts", attempt+1)
|
||||||
|
|
||||||
|
return &result, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
return &result, nil
|
return nil, fmt.Errorf("failed to import a build after %d attempts: %w", retryCount, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// uploadChunk uploads a byte slice to a given filepath/filname at a given offset
|
// uploadChunk uploads a byte slice to a given filepath/filname at a given offset
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue