Don't Panic in getComposeStatus and skip invalid jobs in fsjobqueue New

This handles corrupt job json files by skipping them. They still exist,
and errors are logged, but the system keeps working.

If one or more of the json files in /var/lib/osbuild-composer/jobs/
becomes corrupt they can stop the osbuild-composer service from
starting, or stop commands like 'composer-cli compose status' from
working because they quit on the first error and miss any job that
aren't broken.
This commit is contained in:
Brian C. Lane 2023-11-06 13:21:18 -08:00 committed by Ondřej Budai
parent e969a9dc3c
commit aca748bc14
6 changed files with 291 additions and 18 deletions

View file

@ -364,7 +364,7 @@ func composeStateFromJobStatus(js *worker.JobStatus, result *worker.OSBuildJobRe
// Returns the state of the image in `compose` and the times the job was
// queued, started, and finished. Assumes that there's only one image in the
// compose.
func (api *API) getComposeStatus(compose store.Compose) *composeStatus {
func (api *API) getComposeStatus(compose store.Compose) (*composeStatus, error) {
jobId := compose.ImageBuild.JobID
// backwards compatibility: composes that were around before splitting
@ -388,14 +388,14 @@ func (api *API) getComposeStatus(compose store.Compose) *composeStatus {
Started: compose.ImageBuild.JobStarted,
Finished: compose.ImageBuild.JobFinished,
Result: &osbuild.Result{},
}
}, nil
}
// All jobs are "osbuild" jobs.
var result worker.OSBuildJobResult
jobInfo, err := api.workers.OSBuildJobInfo(jobId, &result)
if err != nil {
panic(err)
return nil, err
}
return &composeStatus{
@ -404,7 +404,7 @@ func (api *API) getComposeStatus(compose store.Compose) *composeStatus {
Started: jobInfo.JobStatus.Started,
Finished: jobInfo.JobStatus.Finished,
Result: result.OSBuildOutput,
}
}, nil
}
// Opens the image file for `compose`. This asks the worker server for the
@ -2659,7 +2659,14 @@ func (api *API) composeDeleteHandler(writer http.ResponseWriter, request *http.R
continue
}
composeStatus := api.getComposeStatus(compose)
composeStatus, err := api.getComposeStatus(compose)
if err != nil {
errors = append(errors, composeDeleteError{
"ComposeStatusError",
fmt.Sprintf("Error getting status of compose %s: %s", id, err),
})
continue
}
if composeStatus.State != ComposeFinished && composeStatus.State != ComposeFailed {
errors = append(errors, composeDeleteError{
"BuildInWrongState",
@ -2724,7 +2731,15 @@ func (api *API) composeCancelHandler(writer http.ResponseWriter, request *http.R
return
}
composeStatus := api.getComposeStatus(compose)
composeStatus, err := api.getComposeStatus(compose)
if err != nil {
errors := responseError{
ID: "ComposeStatusError",
Msg: fmt.Sprintf("Error getting status of compose %s: %s", id, err),
}
statusResponseError(writer, http.StatusInternalServerError, errors)
return
}
if composeStatus.State != ComposeWaiting && composeStatus.State != ComposeRunning {
errors := responseError{
ID: "BuildInWrongState",
@ -2828,7 +2843,11 @@ func (api *API) composeQueueHandler(writer http.ResponseWriter, request *http.Re
composes := api.store.GetAllComposes()
for id, compose := range composes {
composeStatus := api.getComposeStatus(compose)
composeStatus, err := api.getComposeStatus(compose)
if err != nil {
log.Printf("Error getting status of compose %s: %s", id, err)
continue
}
switch composeStatus.State {
case ComposeWaiting:
reply.New = append(reply.New, composeToComposeEntry(id, compose, composeStatus, includeUploads))
@ -2899,7 +2918,12 @@ func (api *API) composeStatusHandler(writer http.ResponseWriter, request *http.R
if !exists {
continue
}
composeStatus := api.getComposeStatus(compose)
composeStatus, err := api.getComposeStatus(compose)
if err != nil {
log.Printf("Error getting status of compose %s: %s", id, err)
continue
}
if filterBlueprint != "" && compose.Blueprint.Name != filterBlueprint {
continue
} else if filterStatus != "" && composeStatus.State.ToString() != filterStatus {
@ -2914,7 +2938,12 @@ func (api *API) composeStatusHandler(writer http.ResponseWriter, request *http.R
includeUploads := isRequestVersionAtLeast(params, 1)
for _, id := range filteredUUIDs {
if compose, exists := composes[id]; exists {
composeStatus := api.getComposeStatus(compose)
composeStatus, err := api.getComposeStatus(compose)
if err != nil {
log.Printf("Error getting status of compose %s: %s", id, err)
continue
}
reply.UUIDs = append(reply.UUIDs, composeToComposeEntry(id, compose, composeStatus, includeUploads))
}
}
@ -2969,7 +2998,16 @@ func (api *API) composeInfoHandler(writer http.ResponseWriter, request *http.Req
reply.Blueprint = compose.Blueprint
// Weldr API assumes only one image build per compose, that's why only the
// 1st build is considered
composeStatus := api.getComposeStatus(compose)
composeStatus, err := api.getComposeStatus(compose)
if err != nil {
errors := responseError{
ID: "ComposeStatusError",
Msg: fmt.Sprintf("Error getting status of compose %s: %s", id, err),
}
statusResponseError(writer, http.StatusInternalServerError, errors)
return
}
reply.ComposeType = compose.ImageBuild.ImageType.Name()
reply.QueueStatus = composeStatus.State.ToString()
reply.ImageSize = compose.ImageBuild.Size
@ -3016,7 +3054,15 @@ func (api *API) composeImageHandler(writer http.ResponseWriter, request *http.Re
return
}
composeStatus := api.getComposeStatus(compose)
composeStatus, err := api.getComposeStatus(compose)
if err != nil {
errors := responseError{
ID: "ComposeStatusError",
Msg: fmt.Sprintf("Error getting status of compose %s: %s", uuid, err),
}
statusResponseError(writer, http.StatusInternalServerError, errors)
return
}
if composeStatus.State != ComposeFinished {
errors := responseError{
ID: "BuildInWrongState",
@ -3074,7 +3120,15 @@ func (api *API) composeMetadataHandler(writer http.ResponseWriter, request *http
return
}
composeStatus := api.getComposeStatus(compose)
composeStatus, err := api.getComposeStatus(compose)
if err != nil {
errors := responseError{
ID: "ComposeStatusError",
Msg: fmt.Sprintf("Error getting status of compose %s: %s", uuid, err),
}
statusResponseError(writer, http.StatusInternalServerError, errors)
return
}
if composeStatus.State != ComposeFinished && composeStatus.State != ComposeFailed {
errors := responseError{
ID: "BuildInWrongState",
@ -3135,7 +3189,15 @@ func (api *API) composeResultsHandler(writer http.ResponseWriter, request *http.
return
}
composeStatus := api.getComposeStatus(compose)
composeStatus, err := api.getComposeStatus(compose)
if err != nil {
errors := responseError{
ID: "ComposeStatusError",
Msg: fmt.Sprintf("Error getting status of compose %s: %s", uuid, err),
}
statusResponseError(writer, http.StatusInternalServerError, errors)
return
}
if composeStatus.State != ComposeFinished && composeStatus.State != ComposeFailed {
errors := responseError{
ID: "BuildInWrongState",
@ -3226,7 +3288,15 @@ func (api *API) composeLogsHandler(writer http.ResponseWriter, request *http.Req
return
}
composeStatus := api.getComposeStatus(compose)
composeStatus, err := api.getComposeStatus(compose)
if err != nil {
errors := responseError{
ID: "ComposeStatusError",
Msg: fmt.Sprintf("Error getting status of compose %s: %s", id, err),
}
statusResponseError(writer, http.StatusInternalServerError, errors)
return
}
if composeStatus.State != ComposeFinished && composeStatus.State != ComposeFailed {
errors := responseError{
ID: "BuildInWrongState",
@ -3290,7 +3360,15 @@ func (api *API) composeLogHandler(writer http.ResponseWriter, request *http.Requ
return
}
composeStatus := api.getComposeStatus(compose)
composeStatus, err := api.getComposeStatus(compose)
if err != nil {
errors := responseError{
ID: "ComposeStatusError",
Msg: fmt.Sprintf("Error getting status of compose %s: %s", id, err),
}
statusResponseError(writer, http.StatusInternalServerError, errors)
return
}
if composeStatus.State == ComposeWaiting {
errors := responseError{
ID: "BuildInWrongState",
@ -3320,7 +3398,11 @@ func (api *API) composeFinishedHandler(writer http.ResponseWriter, request *http
includeUploads := isRequestVersionAtLeast(params, 1)
for id, compose := range api.store.GetAllComposes() {
composeStatus := api.getComposeStatus(compose)
composeStatus, err := api.getComposeStatus(compose)
if err != nil {
log.Printf("Error getting status of compose %s: %s", id, err)
continue
}
if composeStatus.State != ComposeFinished {
continue
}
@ -3343,7 +3425,11 @@ func (api *API) composeFailedHandler(writer http.ResponseWriter, request *http.R
includeUploads := isRequestVersionAtLeast(params, 1)
for id, compose := range api.store.GetAllComposes() {
composeStatus := api.getComposeStatus(compose)
composeStatus, err := api.getComposeStatus(compose)
if err != nil {
log.Printf("Error getting status of compose %s: %s", id, err)
continue
}
if composeStatus.State != ComposeFailed {
continue
}

View file

@ -1373,6 +1373,8 @@ func TestComposeStatus(t *testing.T) {
{rpmmd_mock.BaseFixture, "GET", "/api/v0/compose/status/*?status=FINISHED", ``, http.StatusOK, fmt.Sprintf(`{"uuids":[{"id":"30000000-0000-0000-0000-000000000002","blueprint":"test","version":"0.0.0","compose_type":"%[1]s","image_size":0,"queue_status":"FINISHED","job_created":1574857140,"job_started":1574857140,"job_finished":1574857140},{"id":"30000000-0000-0000-0000-000000000004","blueprint":"test","version":"0.0.0","compose_type":"%[1]s","image_size":0,"queue_status":"FINISHED","job_created":1574857140,"job_started":1574857140,"job_finished":1574857140}]}`, test_distro.TestImageTypeName)},
{rpmmd_mock.BaseFixture, "GET", fmt.Sprintf("/api/v0/compose/status/*?type=%s", test_distro.TestImageTypeName), ``, http.StatusOK, fmt.Sprintf(`{"uuids":[{"id":"30000000-0000-0000-0000-000000000000","blueprint":"test","version":"0.0.0","compose_type":"%[1]s","image_size":0,"queue_status":"WAITING","job_created":1574857140},{"id":"30000000-0000-0000-0000-000000000001","blueprint":"test","version":"0.0.0","compose_type":"%[1]s","image_size":0,"queue_status":"RUNNING","job_created":1574857140,"job_started":1574857140},{"id":"30000000-0000-0000-0000-000000000002","blueprint":"test","version":"0.0.0","compose_type":"%[1]s","image_size":0,"queue_status":"FINISHED","job_created":1574857140,"job_started":1574857140,"job_finished":1574857140},{"id":"30000000-0000-0000-0000-000000000003","blueprint":"test","version":"0.0.0","compose_type":"%[1]s","image_size":0,"queue_status":"FAILED","job_created":1574857140,"job_started":1574857140,"job_finished":1574857140},{"id":"30000000-0000-0000-0000-000000000004","blueprint":"test","version":"0.0.0","compose_type":"%[1]s","image_size":0,"queue_status":"FINISHED","job_created":1574857140,"job_started":1574857140,"job_finished":1574857140}]}`, test_distro.TestImageTypeName)},
{rpmmd_mock.BaseFixture, "GET", "/api/v1/compose/status/30000000-0000-0000-0000-000000000000", ``, http.StatusOK, fmt.Sprintf(`{"uuids":[{"id":"30000000-0000-0000-0000-000000000000","blueprint":"test","version":"0.0.0","compose_type":"%[1]s","image_size":0,"queue_status":"WAITING","job_created":1574857140,"uploads":[{"uuid":"10000000-0000-0000-0000-000000000000","status":"WAITING","provider_name":"aws","image_name":"awsimage","creation_time":1574857140,"settings":{"region":"frankfurt","bucket":"clay","key":"imagekey"}}]}]}`, test_distro.TestImageTypeName)},
{rpmmd_mock.BadJobJSONFixture, "GET", "/api/v0/compose/status/*", ``, http.StatusOK, fmt.Sprintf(`{"uuids":[{"id":"30000000-0000-0000-0000-000000000000","blueprint":"test","version":"0.0.0","compose_type":"%[1]s","image_size":0,"queue_status":"WAITING","job_created":1574857140},{"id":"30000000-0000-0000-0000-000000000001","blueprint":"test","version":"0.0.0","compose_type":"%[1]s","image_size":0,"queue_status":"RUNNING","job_created":1574857140,"job_started":1574857140},{"id":"30000000-0000-0000-0000-000000000002","blueprint":"test","version":"0.0.0","compose_type":"%[1]s","image_size":0,"queue_status":"FINISHED","job_created":1574857140,"job_started":1574857140,"job_finished":1574857140},{"id":"30000000-0000-0000-0000-000000000003","blueprint":"test","version":"0.0.0","compose_type":"%[1]s","image_size":0,"queue_status":"FAILED","job_created":1574857140,"job_started":1574857140,"job_finished":1574857140},{"id":"30000000-0000-0000-0000-000000000004","blueprint":"test","version":"0.0.0","compose_type":"%[1]s","image_size":0,"queue_status":"FINISHED","job_created":1574857140,"job_started":1574857140,"job_finished":1574857140}]}`, test_distro.TestImageTypeName)},
{rpmmd_mock.BadJobJSONFixture, "GET", "/api/v0/compose/status/30000000-0000-0000-0000-000000000005", ``, http.StatusOK, `{"uuids":[]}`},
}
if len(os.Getenv("OSBUILD_COMPOSER_TEST_EXTERNAL")) > 0 {
@ -1524,6 +1526,7 @@ func TestComposeQueue(t *testing.T) {
{rpmmd_mock.BaseFixture, "GET", "/api/v0/compose/queue", ``, http.StatusOK, fmt.Sprintf(`{"new":[{"blueprint":"test","version":"0.0.0","compose_type":"%[1]s","image_size":0,"queue_status":"WAITING"}],"run":[{"blueprint":"test","version":"0.0.0","compose_type":"%[1]s","image_size":0,"queue_status":"RUNNING"}]}`, test_distro.TestImageTypeName)},
{rpmmd_mock.BaseFixture, "GET", "/api/v1/compose/queue", ``, http.StatusOK, fmt.Sprintf(`{"new":[{"blueprint":"test","version":"0.0.0","compose_type":"%[1]s","image_size":0,"queue_status":"WAITING","uploads":[{"uuid":"10000000-0000-0000-0000-000000000000","status":"WAITING","provider_name":"aws","image_name":"awsimage","creation_time":1574857140,"settings":{"region":"frankfurt","bucket":"clay","key":"imagekey"}}]}],"run":[{"blueprint":"test","version":"0.0.0","compose_type":"%[1]s","image_size":0,"queue_status":"RUNNING"}]}`, test_distro.TestImageTypeName)},
{rpmmd_mock.NoComposesFixture, "GET", "/api/v0/compose/queue", ``, http.StatusOK, `{"new":[],"run":[]}`},
{rpmmd_mock.BadJobJSONFixture, "GET", "/api/v0/compose/queue", ``, http.StatusOK, fmt.Sprintf(`{"new":[{"blueprint":"test","version":"0.0.0","compose_type":"%[1]s","image_size":0,"queue_status":"WAITING"}],"run":[{"blueprint":"test","version":"0.0.0","compose_type":"%[1]s","image_size":0,"queue_status":"RUNNING"}]}`, test_distro.TestImageTypeName)},
}
if len(os.Getenv("OSBUILD_COMPOSER_TEST_EXTERNAL")) > 0 {