From 6a23a12fd03d177054e1fc15fe96064c7fd04e18 Mon Sep 17 00:00:00 2001 From: Tomas Kopecek Date: Tue, 6 Dec 2022 16:34:25 +0100 Subject: [PATCH 01/92] wip --- cli/koji_cli/commands.py | 56 +++++++++++++++++++++++++++++++ koji/__init__.py | 2 ++ kojihub/kojihub.py | 72 +++++++++++++++++++++++++++++++++++++--- kojihub/kojixmlrpc.py | 3 ++ schemas/schema.sql | 22 ++++++++++++ 5 files changed, 150 insertions(+), 5 deletions(-) diff --git a/cli/koji_cli/commands.py b/cli/koji_cli/commands.py index a906b269..770083bb 100644 --- a/cli/koji_cli/commands.py +++ b/cli/koji_cli/commands.py @@ -7899,3 +7899,59 @@ def anon_handle_repoinfo(goptions, session, args): # repoID option added in 1.33 if options.buildroots: warn("--buildroots option is available with hub 1.33 or newer") + + +def anon_handle_scheduler_info(goptions, session, args): + """[monitor] Show information about scheduling""" + usage = "usage: %prog schedulerinfo [options]" + parser = OptionParser(usage=get_usage_str(usage)) + parser.add_option("-t", "--task", action="store", type=int, default=None, + help="Limit data to given task id") + parser.add_option("--host", action="store", default=None, + help="Limit data to given builder (name/id)") + parser.add_option("--state", action="store", type='str', default=None, + choices=[x for x in koji.TASK_STATES.keys()], + help="Limit data to task state") + (options, args) = parser.parse_args(args) + if len(args) > 0: + parser.error("This command takes no arguments") + + ensure_connection(session, goptions) + + host_id = None + if options.host: + try: + host_id = int(options.host) + except ValueError: + host_id = session.getHost(options.host, strict=True)['id'] + + if options.state: + state = koji.TASK_STATES[options.state] + else: + state = None + + # get the data + runs = session.scheduler.getTaskRuns(taskID=options.task, hostID=host_id, state=state) + mask = '%(task_id)s\t%(host_id)s\t%(state)s\t%(create_time)s\t%(start_time)s\t%(end_time)s' + if not goptions.quiet: + header = mask % { + 'task_id': 'Task', + 'host_name': 'Host', + 'state': 'State', + 'create_time': 'Created', + 'start_time': 'Started', + 'end_time': 'Ended' + } + print(header) + print('-' * len(header)) + for run in runs: + run['state'] = koji.TASK_STATES[runs['state']] + print(mask % run) + + if host_id: + print('Host data for %s:' % options.host) + host_data = session.scheduler.getHostData(hostID=host_id) + if len(host_data) > 0: + print(host_data[0]['data']) + else: + print('-') diff --git a/koji/__init__.py b/koji/__init__.py index 4d0e7b25..6f1aa837 100644 --- a/koji/__init__.py +++ b/koji/__init__.py @@ -194,6 +194,8 @@ TASK_STATES = Enum(( 'CANCELED', 'ASSIGNED', 'FAILED', + 'SCHEDULED', + 'REFUSED', )) BUILD_STATES = Enum(( diff --git a/kojihub/kojihub.py b/kojihub/kojihub.py index cc266f3a..b4b37588 100644 --- a/kojihub/kojihub.py +++ b/kojihub/kojihub.py @@ -95,6 +95,7 @@ from .db import ( # noqa: F401 logger = logging.getLogger('koji.hub') +sched_logger = scheduler.DBLogger() NUMERIC_TYPES = (int, float) @@ -315,10 +316,12 @@ class Task(object): else: return None - def free(self): + def free(self, newstate=koji.TASK_STATES['FREE']): """Free a task""" + if newstate not in [koji.TASK_STATES['FREE'], koji.TASK_STATES['REFUSED']]: + raise koji.GenericError("Can't be called with other than FREE/REFUSED states") info = self.getInfo(request=True) - self.runCallbacks('preTaskStateChange', info, 'state', koji.TASK_STATES['FREE']) + self.runCallbacks('preTaskStateChange', info, 'state', newstate) self.runCallbacks('preTaskStateChange', info, 'host_id', None) # access checks should be performed by calling function query = QueryProcessor(tables=['task'], columns=['state'], clauses=['id = %(id)i'], @@ -327,14 +330,13 @@ class Task(object): if not oldstate: raise koji.GenericError("No such task: %i" % self.id) if koji.TASK_STATES[oldstate] in ['CLOSED', 'CANCELED', 'FAILED']: - raise koji.GenericError("Cannot free task %i, state is %s" % + raise koji.GenericError("Cannot free/refuse task %i, state is %s" % (self.id, koji.TASK_STATES[oldstate])) - newstate = koji.TASK_STATES['FREE'] newhost = None update = UpdateProcessor('task', clauses=['id=%(task_id)s'], values={'task_id': self.id}, data={'state': newstate, 'host_id': newhost}) update.execute() - self.runCallbacks('postTaskStateChange', info, 'state', koji.TASK_STATES['FREE']) + self.runCallbacks('postTaskStateChange', info, 'state', newstate) self.runCallbacks('postTaskStateChange', info, 'host_id', None) return True @@ -14409,6 +14411,66 @@ class HostExports(object): task.assertHost(host.id) return task.setWeight(weight) + def setHostData(self, hostdata): + """Builder will update all its resources + + Initial implementation contains: + - available task methods + - maxjobs + - host readiness + """ + host = Host() + host.verify() + clauses = ['host_id = %(host_id)i'] + values = {'host_id': host.id} + table = 'scheduler_host_data' + query = QueryProcessor(tables=[table], clauses=clauses, values=values, + opts={'countOnly': True}) + if query.singleValue() > 0: + update = UpdateProcessor(table=table, data={'data': hostdata}, + clauses=clauses, values=values) + update.execute() + else: + insert = InsertProcessor(table=table, data={'data': hostdata}, + clauses=clauses, values=values) + insert.execute() + sched_logger.debug(f"Updating host data with: {hostdata}", + host_id=host.id, location='setHostData') + + def getTasks(self): + host = Host() + host.verify() + + query = QueryProcessor( + tables=['scheduler_task_runs'], + clauses=[ + 'host_id = %(host_id)s', + 'state in %(states)s' + ], + values={ + 'host_id': host.id, + 'states': [ + koji.TASK_STATES['SCHEDULED'], + koji.TASK_STATES['ASSIGNED'], + ], + } + ) + tasks = query.execute() + for task in tasks: + sched_logger.debug("Sending task", host_id=host.id, task_id=task['id'], + location="getTasks") + return tasks + + def refuseTask(self, task_id): + host = Host() + host.verify() + + task = Task(task_id) + task.free(newstate=koji.TASK_STATES['REFUSED']) + sched_logger.warning("Refusing task", host_id=host.id, task_id=task_id, + location="refuseTask") + return True + def getHostTasks(self): host = Host() host.verify() diff --git a/kojihub/kojixmlrpc.py b/kojihub/kojixmlrpc.py index e9e0b3d9..88f2f806 100644 --- a/kojihub/kojixmlrpc.py +++ b/kojihub/kojixmlrpc.py @@ -711,6 +711,7 @@ def setup_logging2(opts): log_handler.setFormatter(HubFormatter(opts['LogFormat'])) + import scheduler def get_memory_usage(): pagesize = resource.getpagesize() statm = [pagesize * int(y) // 1024 @@ -844,8 +845,10 @@ def get_registry(opts, plugins): registry = HandlerRegistry() functions = kojihub.RootExports() hostFunctions = kojihub.HostExports() + schedulerFunctions = scheduler.SchedulerExports() registry.register_instance(functions) registry.register_module(hostFunctions, "host") + registry.register_module(schedulerFunctions, "scheduler") registry.register_function(auth.login) registry.register_function(auth.sslLogin) registry.register_function(auth.logout) diff --git a/schemas/schema.sql b/schemas/schema.sql index 5fa6a22c..cfe39326 100644 --- a/schemas/schema.sql +++ b/schemas/schema.sql @@ -983,6 +983,28 @@ CREATE TABLE rpm_checksum ( ) WITHOUT OIDS; CREATE INDEX rpm_checksum_rpm_id ON rpm_checksum(rpm_id); + +-- scheduler tables +CREATE TABLE scheduler_task_runs ( + id SERIAL NOT NULL PRIMARY KEY, + task_id INTEGER REFERENCES task (id) NOT NULL, + host_id INTEGER REFERENCES host (id) NOT NULL, + state INTEGER NOT NULL, + create_time TIMESTAMPTZ NOT NULL DEFAULT NOW(), + start_time TIMESTAMPTZ, + end_time TIMESTAMPTZ, +) WITHOUT OIDS; +CREATE INDEX scheduler_task_runs_task ON scheduler_task_runs(task_id); +CREATE INDEX scheduler_task_runs_host ON scheduler_task_runs(host_id); +CREATE INDEX scheduler_task_runs_state ON scheduler_task_runs(state); +CREATE INDEX scheduler_task_runs_create_time ON scheduler_task_runs(create_time); + +CREATE TABLE scheduler_host_data ( + host_id INTEGER REFERENCES host (id) PRIMARY KEY, + data JSONB, +) WITHOUT OIDS; + + -- this table is used for locking, see db_lock() CREATE TABLE locks ( name TEXT NOT NULL PRIMARY KEY From 4241098f4669d4232d57aca023d2aa7ca08891db Mon Sep 17 00:00:00 2001 From: Tomas Kopecek Date: Wed, 7 Dec 2022 15:09:32 +0100 Subject: [PATCH 02/92] replace old calls --- kojihub/kojihub.py | 72 ++-------------------------------------------- 1 file changed, 3 insertions(+), 69 deletions(-) diff --git a/kojihub/kojihub.py b/kojihub/kojihub.py index b4b37588..01124801 100644 --- a/kojihub/kojihub.py +++ b/kojihub/kojihub.py @@ -2536,44 +2536,6 @@ def set_channel_enabled(channelname, enabled=True, comment=None): update.execute() -def get_ready_hosts(): - """Return information about hosts that are ready to build. - - Hosts set the ready flag themselves - Note: We ignore hosts that are late checking in (even if a host - is busy with tasks, it should be checking in quite often). - """ - query = QueryProcessor( - tables=['host'], - columns=['host.id', 'name', 'arches', 'task_load', 'capacity'], - aliases=['id', 'name', 'arches', 'task_load', 'capacity'], - clauses=[ - 'enabled IS TRUE', - 'ready IS TRUE', - 'expired IS FALSE', - 'master IS NULL', - 'active IS TRUE', - "sessions.update_time > NOW() - '5 minutes'::interval" - ], - joins=[ - 'sessions USING (user_id)', - 'host_config ON host.id = host_config.host_id' - ] - ) - hosts = query.execute() - for host in hosts: - query = QueryProcessor( - tables=['host_channels'], - columns=['channel_id'], - clauses=['host_id=%(id)s', 'active IS TRUE', 'enabled IS TRUE'], - joins=['channels ON host_channels.channel_id = channels.id'], - values=host - ) - rows = query.execute() - host['channels'] = [row['channel_id'] for row in rows] - return hosts - - def get_all_arches(): """Return a list of all (canonical) arches available from hosts""" ret = {} @@ -2589,27 +2551,6 @@ def get_all_arches(): return list(ret.keys()) -def get_active_tasks(host=None): - """Return data on tasks that are yet to be run""" - fields = ['id', 'state', 'channel_id', 'host_id', 'arch', 'method', 'priority', 'create_time'] - values = dslice(koji.TASK_STATES, ('FREE', 'ASSIGNED')) - if host: - values['arches'] = host['arches'].split() + ['noarch'] - values['channels'] = host['channels'] - values['host_id'] = host['id'] - clause = '(state = %(ASSIGNED)i AND host_id = %(host_id)i)' - if values['channels']: - clause += ''' OR (state = %(FREE)i AND arch IN %(arches)s \ -AND channel_id IN %(channels)s)''' - clauses = [clause] - else: - clauses = ['state IN (%(FREE)i,%(ASSIGNED)i)'] - queryOpts = {'limit': 100, 'order': 'priority,create_time'} - query = QueryProcessor(columns=fields, tables=['task'], clauses=clauses, - values=values, opts=queryOpts) - return query.execute() - - def get_task_descendents(task, childMap=None, request=False): if childMap is None: childMap = {} @@ -14328,16 +14269,9 @@ class Host(object): This data is relatively small and the necessary load analysis is relatively complex, so we let the host machines crunch it.""" - hosts = get_ready_hosts() - for host in hosts: - if host['id'] == self.id: - break - else: - # this host not in ready list - return [[], []] - # host is the host making the call - tasks = get_active_tasks(host) - return [hosts, tasks] + host = get_host(self.id) + tasks = scheduler.getTaskRuns(hostID=self.id) + return [[host], tasks] def isEnabled(self): """Return whether this host is enabled or not.""" From 159eed548c1df5a1f3e73b8303e6e1362deb163a Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Sat, 4 Mar 2023 10:27:37 -0500 Subject: [PATCH 03/92] partial: pulling work from other branch --- schemas/schema.sql | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/schemas/schema.sql b/schemas/schema.sql index cfe39326..007e75e3 100644 --- a/schemas/schema.sql +++ b/schemas/schema.sql @@ -1005,6 +1005,18 @@ CREATE TABLE scheduler_host_data ( ) WITHOUT OIDS; +CREATE TABLE scheduler_log_messages ( + id SERIAL NOT NULL PRIMARY KEY, + task_id INTEGER REFERENCES task (id), + host_id INTEGER REFERENCES host (id), + msg_time TIMESTAMPTZ NOT NULL DEFAULT NOW(), + logger_name VARCHAR(200) NOT NULL, + level VARCHAR(10) NOT NULL, + location VARCHAR(200), + msg TEXT NOT NULL +) WITHOUT OIDS; + + -- this table is used for locking, see db_lock() CREATE TABLE locks ( name TEXT NOT NULL PRIMARY KEY From c59ea1de2bf976d1308845e85b80a89f5483ad4e Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Tue, 7 Mar 2023 08:26:35 -0500 Subject: [PATCH 04/92] ... --- schemas/schema.sql | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/schemas/schema.sql b/schemas/schema.sql index 007e75e3..e778c21f 100644 --- a/schemas/schema.sql +++ b/schemas/schema.sql @@ -999,12 +999,23 @@ CREATE INDEX scheduler_task_runs_host ON scheduler_task_runs(host_id); CREATE INDEX scheduler_task_runs_state ON scheduler_task_runs(state); CREATE INDEX scheduler_task_runs_create_time ON scheduler_task_runs(create_time); + CREATE TABLE scheduler_host_data ( host_id INTEGER REFERENCES host (id) PRIMARY KEY, data JSONB, ) WITHOUT OIDS; +CREATE TABLE scheduler_map ( + id SERIAL NOT NULL PRIMARY KEY, + task_id INTEGER REFERENCES task (id) NOT NULL, + host_id INTEGER REFERENCES host (id) NOT NULL, + runnable BOOLEAN NOT NULL, + refused BOOLEAN NOT NULL, + check_time TIMESTAMPTZ NOT NULL DEFAULT NOW() +) WITHOUT OIDS; + + CREATE TABLE scheduler_log_messages ( id SERIAL NOT NULL PRIMARY KEY, task_id INTEGER REFERENCES task (id), From b379db83763ff304482b13ae3a3708ff6b9960d0 Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Fri, 17 Mar 2023 15:26:44 -0400 Subject: [PATCH 05/92] stub to let hub run --- kojihub/kojihub.py | 1 + kojihub/kojixmlrpc.py | 2 +- kojihub/scheduler.py | 9 +++++++++ 3 files changed, 11 insertions(+), 1 deletion(-) create mode 100644 kojihub/scheduler.py diff --git a/kojihub/kojihub.py b/kojihub/kojihub.py index 01124801..6e19e238 100644 --- a/kojihub/kojihub.py +++ b/kojihub/kojihub.py @@ -74,6 +74,7 @@ from koji.util import ( multi_fnmatch, safer_move, ) +from . import scheduler from .auth import get_user_perms, get_user_groups from .db import ( # noqa: F401 BulkInsertProcessor, diff --git a/kojihub/kojixmlrpc.py b/kojihub/kojixmlrpc.py index 88f2f806..f9000ca2 100644 --- a/kojihub/kojixmlrpc.py +++ b/kojihub/kojixmlrpc.py @@ -41,6 +41,7 @@ from koji.server import ServerError, BadRequest, RequestTimeout from koji.xmlrpcplus import ExtendedMarshaller, Fault, dumps, getparser from . import auth from . import db +from . import scheduler class Marshaller(ExtendedMarshaller): @@ -711,7 +712,6 @@ def setup_logging2(opts): log_handler.setFormatter(HubFormatter(opts['LogFormat'])) - import scheduler def get_memory_usage(): pagesize = resource.getpagesize() statm = [pagesize * int(y) // 1024 diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py new file mode 100644 index 00000000..7491021a --- /dev/null +++ b/kojihub/scheduler.py @@ -0,0 +1,9 @@ +# scheduler code goes here + + +class DBLogger: + pass + + +class SchedulerExports: + pass From 95b2a3c7e73325b99582aa6f14fdbdf3c81182b4 Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Fri, 17 Mar 2023 16:23:07 -0400 Subject: [PATCH 06/92] basic getTaskRuns function --- kojihub/scheduler.py | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index 7491021a..57bf08b3 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -1,5 +1,8 @@ # scheduler code goes here +from .kojihub import convert_value +from .db import QueryProcessor + class DBLogger: pass @@ -7,3 +10,43 @@ class DBLogger: class SchedulerExports: pass + + +def intlist(value): + """Cast value to a list of ints""" + if isinstance(value, (list, tuple)): + return [int(n) for n in value] + else: + return [int(value)] + + +def getTaskRuns(taskID=None, hostID=None, state=None): + taskID = convert_value(taskID, cast=int, none_allowed=True) + hostID = convert_value(hostID, cast=int, none_allowed=True) + state = convert_value(state, cast=intlist, none_allowed=True) + + fields = ( + ('scheduler_task_runs.id', 'id'), + ('task_id', 'task_id'), + ('host_id', 'host_id'), + ('host.name', 'host_name'), + ("date_part('epoch', create_time)", 'create_ts'), + ("date_part('epoch', start_time)", 'start_ts'), + ("date_part('epoch', edn_time)", 'end_ts'), + ) + fields, aliases = zip(*fields) + + clauses = [] + if taskID is not None: + clauses.append('task_id = %(taskID)s') + if hostID is not None: + clauses.append('host_id = %(hostID)s') + if state is not None: + clauses.append('host_id IN %(state)s') + + query = QueryProcessor( + columns=fields, aliases=aliases, tables=['scheduler_task_runs'], + joins=['LEFT OUTER JOIN host on host_id=host.id'], + clauses=clauses, values=locals()) + + return query.execute() From 6308e8f0ada6b9d69251bb514a02cf2abba3d64b Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Fri, 17 Mar 2023 16:31:02 -0400 Subject: [PATCH 07/92] move convert_value to util lib --- kojihub/kojihub.py | 31 +------------------------------ kojihub/scheduler.py | 2 +- kojihub/util.py | 33 +++++++++++++++++++++++++++++++++ 3 files changed, 35 insertions(+), 31 deletions(-) create mode 100644 kojihub/util.py diff --git a/kojihub/kojihub.py b/kojihub/kojihub.py index 6e19e238..262a67de 100644 --- a/kojihub/kojihub.py +++ b/kojihub/kojihub.py @@ -93,6 +93,7 @@ from .db import ( # noqa: F401 nextval, currval, ) +from .util import convert_value logger = logging.getLogger('koji.hub') @@ -111,36 +112,6 @@ def xform_user_krb(entry): return entry -def convert_value(value, cast=None, message=None, - exc_type=koji.ParameterError, none_allowed=False, check_only=False): - """Cast to another type with tailored exception - - :param any value: tested object - :param type cast: To which type value should be cast - :param type exc_type: Raise this exception - :param bool none_allowed: Is None valid value? - :param check_only: Don't convert but raise an exception if type(value) != cast - - :returns any value: returns converted value - """ - if value is None: - if not none_allowed: - raise exc_type(message or f"Invalid type, expected type {cast}") - else: - return value - if check_only: - if not isinstance(value, cast): - raise exc_type(message or f"Invalid type for value '{value}': {type(value)}, " - f"expected type {cast}") - else: - try: - value = cast(value) - except (ValueError, TypeError): - raise exc_type(message or f"Invalid type for value '{value}': {type(value)}, " - f"expected type {cast}") - return value - - class Task(object): """A task for the build hosts""" diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index 57bf08b3..12a7495a 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -1,7 +1,7 @@ # scheduler code goes here -from .kojihub import convert_value from .db import QueryProcessor +from .util import convert_value class DBLogger: diff --git a/kojihub/util.py b/kojihub/util.py new file mode 100644 index 00000000..a874676d --- /dev/null +++ b/kojihub/util.py @@ -0,0 +1,33 @@ +import koji + + +def convert_value(value, cast=None, message=None, + exc_type=koji.ParameterError, none_allowed=False, check_only=False): + """Cast to another type with tailored exception + + :param any value: tested object + :param type cast: To which type value should be cast + :param type exc_type: Raise this exception + :param bool none_allowed: Is None valid value? + :param check_only: Don't convert but raise an exception if type(value) != cast + + :returns any value: returns converted value + """ + if value is None: + if not none_allowed: + raise exc_type(message or f"Invalid type, expected type {cast}") + else: + return value + if check_only: + if not isinstance(value, cast): + raise exc_type(message or f"Invalid type for value '{value}': {type(value)}, " + f"expected type {cast}") + else: + try: + value = cast(value) + except (ValueError, TypeError): + raise exc_type(message or f"Invalid type for value '{value}': {type(value)}, " + f"expected type {cast}") + return value + + From 46af52c6080bd3d530868c6e566eefd1ba58e45b Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Fri, 17 Mar 2023 16:37:25 -0400 Subject: [PATCH 08/92] typo --- kojihub/scheduler.py | 2 +- schemas/schema.sql | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index 12a7495a..10f96d35 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -32,7 +32,7 @@ def getTaskRuns(taskID=None, hostID=None, state=None): ('host.name', 'host_name'), ("date_part('epoch', create_time)", 'create_ts'), ("date_part('epoch', start_time)", 'start_ts'), - ("date_part('epoch', edn_time)", 'end_ts'), + ("date_part('epoch', end_time)", 'end_ts'), ) fields, aliases = zip(*fields) diff --git a/schemas/schema.sql b/schemas/schema.sql index e778c21f..2a555e7d 100644 --- a/schemas/schema.sql +++ b/schemas/schema.sql @@ -992,7 +992,7 @@ CREATE TABLE scheduler_task_runs ( state INTEGER NOT NULL, create_time TIMESTAMPTZ NOT NULL DEFAULT NOW(), start_time TIMESTAMPTZ, - end_time TIMESTAMPTZ, + end_time TIMESTAMPTZ ) WITHOUT OIDS; CREATE INDEX scheduler_task_runs_task ON scheduler_task_runs(task_id); CREATE INDEX scheduler_task_runs_host ON scheduler_task_runs(host_id); @@ -1002,7 +1002,7 @@ CREATE INDEX scheduler_task_runs_create_time ON scheduler_task_runs(create_time) CREATE TABLE scheduler_host_data ( host_id INTEGER REFERENCES host (id) PRIMARY KEY, - data JSONB, + data JSONB ) WITHOUT OIDS; From 4b2074feca57d42f18022f7eae4174b81ebdb65e Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Fri, 17 Mar 2023 17:37:19 -0400 Subject: [PATCH 09/92] return channels in getLoadData --- kojihub/kojihub.py | 1 + 1 file changed, 1 insertion(+) diff --git a/kojihub/kojihub.py b/kojihub/kojihub.py index 262a67de..79ad3fce 100644 --- a/kojihub/kojihub.py +++ b/kojihub/kojihub.py @@ -14242,6 +14242,7 @@ class Host(object): This data is relatively small and the necessary load analysis is relatively complex, so we let the host machines crunch it.""" host = get_host(self.id) + host['channels'] = [c['id'] for c in list_channels(hostID=self.id)] tasks = scheduler.getTaskRuns(hostID=self.id) return [[host], tasks] From 2ecb9b268472b478ba49fe1e69656e4ffe62fb7b Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Fri, 17 Mar 2023 17:58:34 -0400 Subject: [PATCH 10/92] present task runs as assigned tasks to hosts using old api --- kojihub/kojihub.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kojihub/kojihub.py b/kojihub/kojihub.py index 79ad3fce..ae7032bd 100644 --- a/kojihub/kojihub.py +++ b/kojihub/kojihub.py @@ -14244,6 +14244,11 @@ class Host(object): host = get_host(self.id) host['channels'] = [c['id'] for c in list_channels(hostID=self.id)] tasks = scheduler.getTaskRuns(hostID=self.id) + # for builders using this old api, we fake some of this data to get them to take the + # task runs assigned to them + for task in tasks: + task['state'] = koji.TASK_STATES['ASSIGNED'] + task['host_id'] = self.id return [[host], tasks] def isEnabled(self): From 3ba25fc96be4f6781968d88dfb4a768bf5e6ff5c Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Fri, 17 Mar 2023 20:22:37 -0400 Subject: [PATCH 11/92] revert changes to freeTask --- kojihub/kojihub.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/kojihub/kojihub.py b/kojihub/kojihub.py index ae7032bd..d7a6b031 100644 --- a/kojihub/kojihub.py +++ b/kojihub/kojihub.py @@ -288,12 +288,10 @@ class Task(object): else: return None - def free(self, newstate=koji.TASK_STATES['FREE']): + def free(self): """Free a task""" - if newstate not in [koji.TASK_STATES['FREE'], koji.TASK_STATES['REFUSED']]: - raise koji.GenericError("Can't be called with other than FREE/REFUSED states") info = self.getInfo(request=True) - self.runCallbacks('preTaskStateChange', info, 'state', newstate) + self.runCallbacks('preTaskStateChange', info, 'state', koji.TASK_STATES['FREE']) self.runCallbacks('preTaskStateChange', info, 'host_id', None) # access checks should be performed by calling function query = QueryProcessor(tables=['task'], columns=['state'], clauses=['id = %(id)i'], @@ -302,13 +300,14 @@ class Task(object): if not oldstate: raise koji.GenericError("No such task: %i" % self.id) if koji.TASK_STATES[oldstate] in ['CLOSED', 'CANCELED', 'FAILED']: - raise koji.GenericError("Cannot free/refuse task %i, state is %s" % + raise koji.GenericError("Cannot free task %i, state is %s" % (self.id, koji.TASK_STATES[oldstate])) + newstate = koji.TASK_STATES['FREE'] newhost = None update = UpdateProcessor('task', clauses=['id=%(task_id)s'], values={'task_id': self.id}, data={'state': newstate, 'host_id': newhost}) update.execute() - self.runCallbacks('postTaskStateChange', info, 'state', newstate) + self.runCallbacks('postTaskStateChange', info, 'state', koji.TASK_STATES['FREE']) self.runCallbacks('postTaskStateChange', info, 'host_id', None) return True @@ -14377,6 +14376,8 @@ class HostExports(object): host = Host() host.verify() + # XXX + task = Task(task_id) task.free(newstate=koji.TASK_STATES['REFUSED']) sched_logger.warning("Refusing task", host_id=host.id, task_id=task_id, From 5ad9640b9ef5e2565b532b283b909c80944a4123 Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Fri, 17 Mar 2023 20:40:39 -0400 Subject: [PATCH 12/92] pull scheduler stub code from old branch --- kojihub/scheduler.py | 77 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index 10f96d35..f635eeaa 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -50,3 +50,80 @@ def getTaskRuns(taskID=None, hostID=None, state=None): clauses=clauses, values=locals()) return query.execute() + + +def scheduler_map_task(taskinfo): + # map which hosts can take this task + # eventually this will involve more complex rules + q = QueryProcessor() + # select hosts matching arch and channel + hosts = q.execute() + u = InsertProcessor() + + +class TaskScheduler(object): + + def run(self): + if not self.get_lock(): + # already running elsewhere + return + + # get runs + fields = [ + ('id', 'id'), + ('task_id', 'task_id'), + ('host_id', 'host_id'), + ('state', 'state'), + ("date_part('epoch', create_time)", 'create_ts'), + ("date_part('epoch', start_time)", 'start_ts'), + ("date_part('epoch', end_time)", 'end_ts')] + columns, aliases = zip(*fields.items()) + query = QueryProcessor(columns = columns, aliases=aliases, tables=['scheduler_runs']) + runs = query.execute() + runs_by_task = {} + for run in runs: + runs_by_task.setdefault(run['task_id'], []) + runs_by_task[run['task_id']].append(run) + + # get tasks + active_tasks = get_active_tasks() # FREE and ASSIGNED, limit 100, priority ordered + # TODO need a better query, but this will do for now + + # get hosts and bin them + hosts = get_ready_hosts() + hosts_by_bin = {} + for host in hosts: + host['_bins'] = [] + for chan in host['channels']: + for arch in host['arches'].split() + ['noarch']: + host_bin = "%s:%s" % (chan, arch) + hosts_by_bin.setdefault(host_bin, []).append(host) + host['_bins'].append(host_bin) + + for task in active_tasks: + if task['state'] == koji.TASK_STATES['ASSIGNED']: + # TODO -- sort out our interaction with old school assignments + continue + have_run = False + task_runs = runs_by_task.get(task['id'], []) + for run in task_runs: + if run['state'] in OK_RUN_STATES: + have_run = True + break + if have_run: + continue + elif task_runs: + # TODO -- what to do about bad runs? + else: + # we need a run + # XXX need host + self.add_run(task, host) + + def add_run(task, host): + insert = InsertProcessor('scheduler_runs') + insert.set(task_id=task['id'], host_id=host['id'], state=1) + insert.execute() + + def get_lock(self): + # TODO + pass From bcab8f7c1e756bc52826bb36f0e37e22f38bba4d Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Sat, 18 Mar 2023 02:26:48 -0400 Subject: [PATCH 13/92] tinkering --- kojihub/kojihub.py | 5 ++++ kojihub/scheduler.py | 54 +++++++++++++++++++++++++++++++------------- 2 files changed, 43 insertions(+), 16 deletions(-) diff --git a/kojihub/kojihub.py b/kojihub/kojihub.py index d7a6b031..1c5f29d4 100644 --- a/kojihub/kojihub.py +++ b/kojihub/kojihub.py @@ -14243,6 +14243,11 @@ class Host(object): host = get_host(self.id) host['channels'] = [c['id'] for c in list_channels(hostID=self.id)] tasks = scheduler.getTaskRuns(hostID=self.id) + if not tasks: + # try running scheduler + if scheduler.TaskScheduler().run(): + # check again + tasks = scheduler.getTaskRuns(hostID=self.id) # for builders using this old api, we fake some of this data to get them to take the # task runs assigned to them for task in tasks: diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index f635eeaa..787409bc 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -30,6 +30,7 @@ def getTaskRuns(taskID=None, hostID=None, state=None): ('task_id', 'task_id'), ('host_id', 'host_id'), ('host.name', 'host_name'), + ('state', 'state'), ("date_part('epoch', create_time)", 'create_ts'), ("date_part('epoch', start_time)", 'start_ts'), ("date_part('epoch', end_time)", 'end_ts'), @@ -49,7 +50,7 @@ def getTaskRuns(taskID=None, hostID=None, state=None): joins=['LEFT OUTER JOIN host on host_id=host.id'], clauses=clauses, values=locals()) - return query.execute() + data = query.execute() def scheduler_map_task(taskinfo): @@ -66,27 +67,16 @@ class TaskScheduler(object): def run(self): if not self.get_lock(): # already running elsewhere - return + return False - # get runs - fields = [ - ('id', 'id'), - ('task_id', 'task_id'), - ('host_id', 'host_id'), - ('state', 'state'), - ("date_part('epoch', create_time)", 'create_ts'), - ("date_part('epoch', start_time)", 'start_ts'), - ("date_part('epoch', end_time)", 'end_ts')] - columns, aliases = zip(*fields.items()) - query = QueryProcessor(columns = columns, aliases=aliases, tables=['scheduler_runs']) - runs = query.execute() + runs = getTaskRuns() runs_by_task = {} for run in runs: runs_by_task.setdefault(run['task_id'], []) runs_by_task[run['task_id']].append(run) # get tasks - active_tasks = get_active_tasks() # FREE and ASSIGNED, limit 100, priority ordered + active_tasks = self.get_tasks() # TODO need a better query, but this will do for now # get hosts and bin them @@ -119,7 +109,39 @@ class TaskScheduler(object): # XXX need host self.add_run(task, host) - def add_run(task, host): + # indicate that scheduling ran + return True + + def get_tasks(self): + pass + + + def get_task_data(): + joins = ('LEFT OUTER JOIN scheduler_task_runs ON task_id = task.id') + + fields = ( + ('task.id', 'task_id'), + ('scheduler_task_runs.id', 'id'), + ('task_id', 'task_id'), + ('host_id', 'host_id'), + ('host.name', 'host_name'), + ('state', 'state'), + ("date_part('epoch', create_time)", 'create_ts'), + ("date_part('epoch', start_time)", 'start_ts'), + ("date_part('epoch', end_time)", 'end_ts'), + ) + fields, aliases = zip(*fields) + + + query = QueryProcessor( + columns=fields, aliases=aliases, tables=['scheduler_task_runs'], + joins=['LEFT OUTER JOIN host on host_id=host.id'], + clauses=clauses, values=locals()) + + data = query.execute() + + + def add_run(self, task, host): insert = InsertProcessor('scheduler_runs') insert.set(task_id=task['id'], host_id=host['id'], state=1) insert.execute() From 555418454ab010cc03993ed406fe8a7fd0bfefd1 Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Sat, 18 Mar 2023 02:44:27 -0400 Subject: [PATCH 14/92] ... --- kojihub/scheduler.py | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index 787409bc..3bef6efa 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -75,9 +75,8 @@ class TaskScheduler(object): runs_by_task.setdefault(run['task_id'], []) runs_by_task[run['task_id']].append(run) - # get tasks - active_tasks = self.get_tasks() - # TODO need a better query, but this will do for now + # get tasks to schedule + tasks = self.get_tasks() # get hosts and bin them hosts = get_ready_hosts() @@ -113,33 +112,33 @@ class TaskScheduler(object): return True def get_tasks(self): - pass - - - def get_task_data(): - joins = ('LEFT OUTER JOIN scheduler_task_runs ON task_id = task.id') + """Get the tasks that need scheduling""" fields = ( ('task.id', 'task_id'), - ('scheduler_task_runs.id', 'id'), - ('task_id', 'task_id'), - ('host_id', 'host_id'), - ('host.name', 'host_name'), - ('state', 'state'), - ("date_part('epoch', create_time)", 'create_ts'), - ("date_part('epoch', start_time)", 'start_ts'), - ("date_part('epoch', end_time)", 'end_ts'), + ('task.state', 'state'), + ('channel_id', 'channel_id'), + ('task.host_id', 'host_id'), + ('arch', 'arch'), + ('method', 'method'), + ('priority', 'priority'), + ("date_part('epoch', task.create_time)", 'create_ts'), + ('scheduler_task_runs.id', 'run_id'), ) fields, aliases = zip(*fields) + values = {'states': [koji.TASK_STATES[n] for n in ('FREE', 'ASSIGNED')]} query = QueryProcessor( - columns=fields, aliases=aliases, tables=['scheduler_task_runs'], - joins=['LEFT OUTER JOIN host on host_id=host.id'], - clauses=clauses, values=locals()) - - data = query.execute() + columns=fields, aliases=aliases, tables=['task'], + joins=('LEFT OUTER JOIN scheduler_task_runs ON task_id = task.id'), + clauses=('task.state IN %(states)s', 'run_id IS NULL'), + # XXX these clauses are not enough + values=values, + opts={'order': 'priority,create_time'} + ) + return query.execute() def add_run(self, task, host): insert = InsertProcessor('scheduler_runs') From 13fdf5470932551fbe89d8ef8ddc6e79d6284b88 Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Mon, 20 Mar 2023 13:17:18 -0400 Subject: [PATCH 15/92] typo --- kojihub/scheduler.py | 1 + 1 file changed, 1 insertion(+) diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index 3bef6efa..c0959074 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -103,6 +103,7 @@ class TaskScheduler(object): continue elif task_runs: # TODO -- what to do about bad runs? + pass else: # we need a run # XXX need host From cc33d2b1244586feced27fa2d26fa5c26e202fba Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Mon, 20 Mar 2023 22:31:12 -0400 Subject: [PATCH 16/92] more work --- kojihub/scheduler.py | 103 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 90 insertions(+), 13 deletions(-) diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index c0959074..8d29d729 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -76,10 +76,15 @@ class TaskScheduler(object): runs_by_task[run['task_id']].append(run) # get tasks to schedule - tasks = self.get_tasks() + tasks = self.get_free_tasks() + tasks_by_bin = {} + for task in tasks: + tbin = '%(channel_id)s:%(arch)s' % task + task['_bin'] = tbin + tasks_by_bin.setdefault(tbin, []).append(task) # get hosts and bin them - hosts = get_ready_hosts() + hosts = self.get_ready_hosts() hosts_by_bin = {} for host in hosts: host['_bins'] = [] @@ -89,10 +94,34 @@ class TaskScheduler(object): hosts_by_bin.setdefault(host_bin, []).append(host) host['_bins'].append(host_bin) - for task in active_tasks: - if task['state'] == koji.TASK_STATES['ASSIGNED']: - # TODO -- sort out our interaction with old school assignments - continue + # order bins by available host capacity + order = [] + for _bin in hosts_by_bin: + hosts = hosts_by_bin[_bin] + avail = sum([min(0, h['capacity'] - h['task_load']) for h in hosts]) + order.append((avail, _bin)) + order.sort() + + # note bin demand for each host + for n, (avail, _bin) in enumerate(order): + rank = float(n) / len(order) + for host in hosts_by_bin[_bin]: + host.setdefault('_rank', rank) + # so host rank is set by the most contentious bin it covers + # TODO - we could be smarter here, but it's a start + + # sort binned hosts by rank + for _bin in hosts_by_bin: + hosts = hosts_by_bin[_bin] + hosts.sort(key=lambda h: h._rank, reverse=True) + # hosts with least contention first + + # tasks are already in priority order + for task in tasks: + hosts = hosts_by_bin[task['_bin']] + # these are the hosts that _can_ take this task + # TODO - update host ranks as we go + have_run = False task_runs = runs_by_task.get(task['id'], []) for run in task_runs: @@ -112,7 +141,7 @@ class TaskScheduler(object): # indicate that scheduling ran return True - def get_tasks(self): + def get_free_tasks(self): """Get the tasks that need scheduling""" fields = ( @@ -124,23 +153,71 @@ class TaskScheduler(object): ('method', 'method'), ('priority', 'priority'), ("date_part('epoch', task.create_time)", 'create_ts'), - ('scheduler_task_runs.id', 'run_id'), + # ('scheduler_task_runs.id', 'run_id'), ) fields, aliases = zip(*fields) - values = {'states': [koji.TASK_STATES[n] for n in ('FREE', 'ASSIGNED')]} + values = {'states': [koji.TASK_STATES[n] for n in ('FREE',)]} query = QueryProcessor( columns=fields, aliases=aliases, tables=['task'], - joins=('LEFT OUTER JOIN scheduler_task_runs ON task_id = task.id'), - clauses=('task.state IN %(states)s', 'run_id IS NULL'), - # XXX these clauses are not enough + # joins=('LEFT OUTER JOIN scheduler_task_runs ON task_id = task.id'), + # clauses=('task.state IN %(states)s', 'run_id IS NULL'), + clauses=('task.state IN %(states)s',), values=values, - opts={'order': 'priority,create_time'} + opts={'order': 'priority,create_time'}, + # scheduler order + # lower priority numbers take precedence, like posix process priority + # at a given priority, earlier creation times take precedence ) return query.execute() + def get_ready_hosts(): + """Query hosts that are ready to build""" + + fields = ( + ('host.id', 'id'), + ('host.name', 'name'), + ('host.task_load', 'task_load'), + ('host_config.arches', 'arches'), + ('host_config.capacity', 'capacity'), + ("date_part('epoch', sessions.update_time)", 'update_ts'), + ) + fields, aliases = zip(*fields) + + query = QueryProcessor( + tables=['host'], + columns=fields, + aliases=aliases, + clauses=[ + 'enabled IS TRUE', + 'ready IS TRUE', + 'expired IS FALSE', + 'master IS NULL', + 'active IS TRUE', + "update_time > NOW() - '5 minutes'::interval" + ], + joins=[ + 'sessions USING (user_id)', + 'host_config ON host.id = host_config.host_id' + ] + ) + + hosts = query.execute() + for host in hosts: + query = QueryProcessor( + tables=['host_channels'], + columns=['channel_id'], + clauses=['host_id=%(id)s', 'active IS TRUE', 'enabled IS TRUE'], + joins=['channels ON host_channels.channel_id = channels.id'], + values=host + ) + rows = query.execute() + host['channels'] = [row['channel_id'] for row in rows] + + return hosts + def add_run(self, task, host): insert = InsertProcessor('scheduler_runs') insert.set(task_id=task['id'], host_id=host['id'], state=1) From 1bf429a4d91dc7a424df8a4358735bd57d770f7f Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Mon, 27 Mar 2023 13:34:45 -0400 Subject: [PATCH 17/92] ... --- kojihub/scheduler.py | 14 ++++++-------- schemas/schema.sql | 1 + 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index 8d29d729..9671ff33 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -64,17 +64,18 @@ def scheduler_map_task(taskinfo): class TaskScheduler(object): + def __init__(self): + self.hosts_by_bin = None + def run(self): if not self.get_lock(): # already running elsewhere return False - runs = getTaskRuns() - runs_by_task = {} - for run in runs: - runs_by_task.setdefault(run['task_id'], []) - runs_by_task[run['task_id']].append(run) + self.do_schedule() + return True + def do_schedule(self): # get tasks to schedule tasks = self.get_free_tasks() tasks_by_bin = {} @@ -138,9 +139,6 @@ class TaskScheduler(object): # XXX need host self.add_run(task, host) - # indicate that scheduling ran - return True - def get_free_tasks(self): """Get the tasks that need scheduling""" diff --git a/schemas/schema.sql b/schemas/schema.sql index 2a555e7d..ee62bc42 100644 --- a/schemas/schema.sql +++ b/schemas/schema.sql @@ -1012,6 +1012,7 @@ CREATE TABLE scheduler_map ( host_id INTEGER REFERENCES host (id) NOT NULL, runnable BOOLEAN NOT NULL, refused BOOLEAN NOT NULL, + msg TEXT, check_time TIMESTAMPTZ NOT NULL DEFAULT NOW() ) WITHOUT OIDS; From d3acd7288d8f0b2bd6c11868a3f1679333f586ea Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Thu, 30 Mar 2023 14:17:16 -0400 Subject: [PATCH 18/92] cleanup --- kojihub/scheduler.py | 29 ++++++++--------------------- 1 file changed, 8 insertions(+), 21 deletions(-) diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index 9671ff33..9759bb44 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -1,5 +1,6 @@ # scheduler code goes here +import koji from .db import QueryProcessor from .util import convert_value @@ -98,7 +99,7 @@ class TaskScheduler(object): # order bins by available host capacity order = [] for _bin in hosts_by_bin: - hosts = hosts_by_bin[_bin] + hosts = hosts_by_bin.get(_bin, []) avail = sum([min(0, h['capacity'] - h['task_load']) for h in hosts]) order.append((avail, _bin)) order.sort() @@ -106,7 +107,7 @@ class TaskScheduler(object): # note bin demand for each host for n, (avail, _bin) in enumerate(order): rank = float(n) / len(order) - for host in hosts_by_bin[_bin]: + for host in hosts_by_bin.get(_bin, []): host.setdefault('_rank', rank) # so host rank is set by the most contentious bin it covers # TODO - we could be smarter here, but it's a start @@ -119,25 +120,11 @@ class TaskScheduler(object): # tasks are already in priority order for task in tasks: - hosts = hosts_by_bin[task['_bin']] + hosts = hosts_by_bin.get(task['_bin'], []) # these are the hosts that _can_ take this task # TODO - update host ranks as we go + # TODO - pick a host and assign - have_run = False - task_runs = runs_by_task.get(task['id'], []) - for run in task_runs: - if run['state'] in OK_RUN_STATES: - have_run = True - break - if have_run: - continue - elif task_runs: - # TODO -- what to do about bad runs? - pass - else: - # we need a run - # XXX need host - self.add_run(task, host) def get_free_tasks(self): """Get the tasks that need scheduling""" @@ -163,7 +150,7 @@ class TaskScheduler(object): # clauses=('task.state IN %(states)s', 'run_id IS NULL'), clauses=('task.state IN %(states)s',), values=values, - opts={'order': 'priority,create_time'}, + opts={'order': 'priority,create_ts'}, # scheduler order # lower priority numbers take precedence, like posix process priority # at a given priority, earlier creation times take precedence @@ -171,7 +158,7 @@ class TaskScheduler(object): return query.execute() - def get_ready_hosts(): + def get_ready_hosts(self): """Query hosts that are ready to build""" fields = ( @@ -223,4 +210,4 @@ class TaskScheduler(object): def get_lock(self): # TODO - pass + return True # XXX From c763efd625fdc60f2fb7be45952e2d5901c42b9c Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Thu, 30 Mar 2023 14:57:49 -0400 Subject: [PATCH 19/92] table lock --- kojihub/scheduler.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index 9759bb44..f3665932 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -1,8 +1,9 @@ -# scheduler code goes here +import psycopg2 import koji from .db import QueryProcessor from .util import convert_value +from koji.context import context class DBLogger: @@ -209,5 +210,11 @@ class TaskScheduler(object): insert.execute() def get_lock(self): - # TODO - return True # XXX + c = context.cnx.cursor() + try: + c.execute('LOCK TABLE scheduler_map IN EXCLUSIVE MODE NOWAIT', log_errors=False) + # This allows parallel reads, but nothing else + # Note that even though we don't log the errors, postgres itself might + except psycopg2.OperationalError: + return False + return True From ca4fe038fe6757f9536d6dc49be0f44d7ae88673 Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Fri, 31 Mar 2023 12:58:05 -0400 Subject: [PATCH 20/92] XXX TEST --- kojihub/kojihub.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kojihub/kojihub.py b/kojihub/kojihub.py index 1c5f29d4..bce543ed 100644 --- a/kojihub/kojihub.py +++ b/kojihub/kojihub.py @@ -10017,6 +10017,9 @@ def _delete_event_id(): class RootExports(object): '''Contains functions that are made available via XMLRPC''' + def TEST(self): + scheduler.TaskScheduler().run() + def restartHosts(self, priority=5, options=None): """Spawns restartHosts task From 44eda4ce28973187106384f254c7fa759f1f2a89 Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Fri, 31 Mar 2023 23:15:58 -0400 Subject: [PATCH 21/92] use db_lock in scheduler --- kojihub/scheduler.py | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index f3665932..fd9300ca 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -1,7 +1,7 @@ import psycopg2 import koji -from .db import QueryProcessor +from .db import QueryProcessor, db_lock from .util import convert_value from koji.context import context @@ -70,7 +70,7 @@ class TaskScheduler(object): self.hosts_by_bin = None def run(self): - if not self.get_lock(): + if not db_lock('scheduler', wait=False): # already running elsewhere return False @@ -208,13 +208,3 @@ class TaskScheduler(object): insert = InsertProcessor('scheduler_runs') insert.set(task_id=task['id'], host_id=host['id'], state=1) insert.execute() - - def get_lock(self): - c = context.cnx.cursor() - try: - c.execute('LOCK TABLE scheduler_map IN EXCLUSIVE MODE NOWAIT', log_errors=False) - # This allows parallel reads, but nothing else - # Note that even though we don't log the errors, postgres itself might - except psycopg2.OperationalError: - return False - return True From d9cf62dcf351ae5e109a254464be104a4dcd1541 Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Fri, 31 Mar 2023 23:53:16 -0400 Subject: [PATCH 22/92] tweak clause order --- kojihub/scheduler.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index fd9300ca..df172ca1 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -177,12 +177,12 @@ class TaskScheduler(object): columns=fields, aliases=aliases, clauses=[ - 'enabled IS TRUE', - 'ready IS TRUE', - 'expired IS FALSE', - 'master IS NULL', - 'active IS TRUE', - "update_time > NOW() - '5 minutes'::interval" + 'host.ready IS TRUE', + 'host_config.enabled IS TRUE', + 'host_config.active IS TRUE', + 'sessions.expired IS FALSE', + 'sessions.master IS NULL', + "sessions.update_time > NOW() - '5 minutes'::interval" ], joins=[ 'sessions USING (user_id)', From 5c38457662bb82bc23fa6202b0cd378ccf695e87 Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Sat, 1 Apr 2023 01:02:14 -0400 Subject: [PATCH 23/92] update scheduler runs as tasks are updated --- kojihub/kojihub.py | 21 +++++++++++++++++++++ kojihub/scheduler.py | 2 +- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/kojihub/kojihub.py b/kojihub/kojihub.py index bce543ed..e26b5d04 100644 --- a/kojihub/kojihub.py +++ b/kojihub/kojihub.py @@ -259,6 +259,16 @@ class Task(object): if state == koji.TASK_STATES['OPEN']: update.rawset(start_time='NOW()') update.execute() + if state == koji.TASK_STATES['OPEN']: + # also update run entry + update2 = UpdateProcessor( + 'scheduler_runs', + clauses=['task_id=%(task_id)s', 'host_id=%(host_id)s'], + values=locals(), + data={'state': state}, + rawdata={'start_time': 'NOW()'}, + ) + update2.execute() self.runCallbacks('postTaskStateChange', info, 'state', koji.TASK_STATES[newstate]) self.runCallbacks('postTaskStateChange', info, 'host_id', host_id) return True @@ -360,6 +370,17 @@ class Task(object): data={'result': info['result'], 'state': state}, rawdata={'completion_time': 'NOW()'}) update.execute() + + # also update run entry + update2 = UpdateProcessor( + 'scheduler_runs', + clauses=['task_id=%(task_id)s', 'host_id=%(host_id)s'], + values=locals(), + data={'state': state}, + rawdata={'end_time': 'NOW()'}, + ) + update2.execute() + self.runCallbacks('postTaskStateChange', info, 'state', state) self.runCallbacks('postTaskStateChange', info, 'completion_ts', now) diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index df172ca1..988603a9 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -206,5 +206,5 @@ class TaskScheduler(object): def add_run(self, task, host): insert = InsertProcessor('scheduler_runs') - insert.set(task_id=task['id'], host_id=host['id'], state=1) + insert.set(task_id=task['id'], host_id=host['id'], state=koji.TASK_STATES['ASSIGNED']) insert.execute() From 0d274b797005e145a552eb44f6689d58503c5c9d Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Tue, 4 Apr 2023 15:42:39 -0400 Subject: [PATCH 24/92] ... --- kojihub/scheduler.py | 94 +++++++++++++++++++++++++++++--------------- schemas/schema.sql | 7 ++++ 2 files changed, 70 insertions(+), 31 deletions(-) diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index 988603a9..d997aa9b 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -68,6 +68,9 @@ class TaskScheduler(object): def __init__(self): self.hosts_by_bin = None + self.tasks_by_bin = None + self.active_tasks = None + self.maxjobs = 15 # XXX def run(self): if not db_lock('scheduler', wait=False): @@ -75,32 +78,26 @@ class TaskScheduler(object): return False self.do_schedule() + # TODO clean up bad data (e.g. active tasks with no host) + # TODO check for runs that aren't getting picked up + return True - def do_schedule(self): - # get tasks to schedule - tasks = self.get_free_tasks() - tasks_by_bin = {} - for task in tasks: - tbin = '%(channel_id)s:%(arch)s' % task - task['_bin'] = tbin - tasks_by_bin.setdefault(tbin, []).append(task) + def get_runs(self): + runs = getTaskRuns() + runs_by_task = {} + for run in runs: + runs_by_task.setdefault(run['task_id'], []) + runs_by_task[run['task_id']].append(run) - # get hosts and bin them - hosts = self.get_ready_hosts() - hosts_by_bin = {} - for host in hosts: - host['_bins'] = [] - for chan in host['channels']: - for arch in host['arches'].split() + ['noarch']: - host_bin = "%s:%s" % (chan, arch) - hosts_by_bin.setdefault(host_bin, []).append(host) - host['_bins'].append(host_bin) + def do_schedule(self): + self.get_tasks() + self.get_hosts() # order bins by available host capacity order = [] - for _bin in hosts_by_bin: - hosts = hosts_by_bin.get(_bin, []) + for _bin in self.hosts_by_bin: + hosts = self.hosts_by_bin.get(_bin, []) avail = sum([min(0, h['capacity'] - h['task_load']) for h in hosts]) order.append((avail, _bin)) order.sort() @@ -114,25 +111,26 @@ class TaskScheduler(object): # TODO - we could be smarter here, but it's a start # sort binned hosts by rank - for _bin in hosts_by_bin: - hosts = hosts_by_bin[_bin] + for _bin in self.hosts_by_bin: + hosts = self.hosts_by_bin[_bin] hosts.sort(key=lambda h: h._rank, reverse=True) # hosts with least contention first # tasks are already in priority order for task in tasks: - hosts = hosts_by_bin.get(task['_bin'], []) + hosts = self.hosts_by_bin.get(task['_bin'], []) # these are the hosts that _can_ take this task # TODO - update host ranks as we go # TODO - pick a host and assign - def get_free_tasks(self): - """Get the tasks that need scheduling""" + def get_tasks(self): + """Get the task data that we need for scheduling""" fields = ( ('task.id', 'task_id'), ('task.state', 'state'), + ('task.waiting', 'waiting'), ('channel_id', 'channel_id'), ('task.host_id', 'host_id'), ('arch', 'arch'), @@ -143,21 +141,55 @@ class TaskScheduler(object): ) fields, aliases = zip(*fields) - values = {'states': [koji.TASK_STATES[n] for n in ('FREE',)]} + values = {'states': [koji.TASK_STATES[n] for n in ('ASSIGNED', 'OPEN')]} query = QueryProcessor( columns=fields, aliases=aliases, tables=['task'], - # joins=('LEFT OUTER JOIN scheduler_task_runs ON task_id = task.id'), - # clauses=('task.state IN %(states)s', 'run_id IS NULL'), - clauses=('task.state IN %(states)s',), + clauses=('task.state IN %(states)s', + 'task.host_id IS NOT NULL', # should always be set, but... + ), values=values, - opts={'order': 'priority,create_ts'}, + ) + active_tasks = query.execute() + + values = {'state': koji.TASK_STATES['FREE']} + query = QueryProcessor( + columns=fields, aliases=aliases, tables=['task'], + clauses=('task.state = %(state)s',), + values=values, + opts={'order': 'priority,create_ts', 'limit': 1000}, # TODO config # scheduler order # lower priority numbers take precedence, like posix process priority # at a given priority, earlier creation times take precedence ) + free_tasks = query.execute() - return query.execute() + tasks_by_bin = {} + for task in free_tasks: + tbin = '%(channel_id)s:%(arch)s' % task + task['_bin'] = tbin + tasks_by_bin.setdefault(tbin, []).append(task) + + for task in active_tasks: + tbin = '%(channel_id)s:%(arch)s' % task + task['_bin'] = tbin + + self.tasks_by_bin = tasks_by_bin + self.active_tasks = active_tasks + + def get_hosts(self): + # get hosts and bin them + hosts = self.get_ready_hosts() + hosts_by_bin = {} + for host in hosts: + host['_bins'] = [] + for chan in host['channels']: + for arch in host['arches'].split() + ['noarch']: + host_bin = "%s:%s" % (chan, arch) + hosts_by_bin.setdefault(host_bin, []).append(host) + host['_bins'].append(host_bin) + + self.hosts_by_bin = hosts_by_bin def get_ready_hosts(self): """Query hosts that are ready to build""" diff --git a/schemas/schema.sql b/schemas/schema.sql index ee62bc42..f97ac2e0 100644 --- a/schemas/schema.sql +++ b/schemas/schema.sql @@ -1000,6 +1000,13 @@ CREATE INDEX scheduler_task_runs_state ON scheduler_task_runs(state); CREATE INDEX scheduler_task_runs_create_time ON scheduler_task_runs(create_time); +CREATE TABLE scheduler_queue ( + task_id INTEGER REFERENCES task (id) NOT NULL PRIMARY KEY, + groomed BOOLEAN NOT NULL DEFAULT FALSE, + run_id INTEGER REFERENCES scheduler_task_run(id) +) WITHOUT OIDS; + + CREATE TABLE scheduler_host_data ( host_id INTEGER REFERENCES host (id) PRIMARY KEY, data JSONB From dc08bff4d681a6120bb5bf29e81e267a30b3b4c2 Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Tue, 4 Apr 2023 22:01:57 -0400 Subject: [PATCH 25/92] ... --- kojihub/scheduler.py | 48 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 39 insertions(+), 9 deletions(-) diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index d997aa9b..218b2769 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -1,3 +1,4 @@ +import logging import psycopg2 import koji @@ -6,6 +7,10 @@ from .util import convert_value from koji.context import context +logger = logging.getLogger('koji.scheduler') +# TODO set up db logging + + class DBLogger: pass @@ -68,6 +73,7 @@ class TaskScheduler(object): def __init__(self): self.hosts_by_bin = None + self.hosts = None self.tasks_by_bin = None self.active_tasks = None self.maxjobs = 15 # XXX @@ -94,18 +100,41 @@ class TaskScheduler(object): self.get_tasks() self.get_hosts() + # calculate host load and task count + for task in self.active_tasks: + # for now, we mirror what kojid updateTasks has been doing + host = self.hosts.get(task['host_id']) + if not host: + # not showing as ready + # TODO log and deal with this condition + continue + host.setdefault('_load', 0.0) + if not task['waiting']: + host['_load'] += task['weight'] + host.setdefault('_ntasks', 0) + host['_ntasks'] += 1 + + for host in self.hosts.values(): + host.setdefault('_load', 0.0) + # temporary test code + logger.info(f'Host: {host}') + ldiff = host['task_load'] - host['_load'] + if abs(ldiff) > 0.01: + # this is expected in a number of cases, just observing + logger.info(f'Host load differs by {ldiff:.2f}: {host}') + # order bins by available host capacity order = [] for _bin in self.hosts_by_bin: hosts = self.hosts_by_bin.get(_bin, []) - avail = sum([min(0, h['capacity'] - h['task_load']) for h in hosts]) + avail = sum([min(0, h['capacity'] - h['_load']) for h in hosts]) order.append((avail, _bin)) order.sort() # note bin demand for each host for n, (avail, _bin) in enumerate(order): rank = float(n) / len(order) - for host in hosts_by_bin.get(_bin, []): + for host in self.hosts_by_bin.get(_bin, []): host.setdefault('_rank', rank) # so host rank is set by the most contentious bin it covers # TODO - we could be smarter here, but it's a start @@ -113,11 +142,11 @@ class TaskScheduler(object): # sort binned hosts by rank for _bin in self.hosts_by_bin: hosts = self.hosts_by_bin[_bin] - hosts.sort(key=lambda h: h._rank, reverse=True) + hosts.sort(key=lambda h: h['_rank'], reverse=True) # hosts with least contention first # tasks are already in priority order - for task in tasks: + for task in self.free_tasks: hosts = self.hosts_by_bin.get(task['_bin'], []) # these are the hosts that _can_ take this task # TODO - update host ranks as we go @@ -131,6 +160,7 @@ class TaskScheduler(object): ('task.id', 'task_id'), ('task.state', 'state'), ('task.waiting', 'waiting'), + ('task.weight', 'weight'), ('channel_id', 'channel_id'), ('task.host_id', 'host_id'), ('arch', 'arch'), @@ -164,25 +194,24 @@ class TaskScheduler(object): ) free_tasks = query.execute() - tasks_by_bin = {} for task in free_tasks: tbin = '%(channel_id)s:%(arch)s' % task task['_bin'] = tbin - tasks_by_bin.setdefault(tbin, []).append(task) for task in active_tasks: tbin = '%(channel_id)s:%(arch)s' % task task['_bin'] = tbin - self.tasks_by_bin = tasks_by_bin + self.free_tasks = free_tasks self.active_tasks = active_tasks def get_hosts(self): # get hosts and bin them - hosts = self.get_ready_hosts() hosts_by_bin = {} - for host in hosts: + hosts_by_id = {} + for host in self.get_ready_hosts(): host['_bins'] = [] + hosts_by_id[host['id']] = host for chan in host['channels']: for arch in host['arches'].split() + ['noarch']: host_bin = "%s:%s" % (chan, arch) @@ -190,6 +219,7 @@ class TaskScheduler(object): host['_bins'].append(host_bin) self.hosts_by_bin = hosts_by_bin + self.hosts = hosts_by_id def get_ready_hosts(self): """Query hosts that are ready to build""" From 49b66e9ec75204019efa3b1bf9b7e86238303783 Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Tue, 4 Apr 2023 22:03:45 -0400 Subject: [PATCH 26/92] ... --- kojihub/scheduler.py | 1 + 1 file changed, 1 insertion(+) diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index 218b2769..a29bb0f6 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -58,6 +58,7 @@ def getTaskRuns(taskID=None, hostID=None, state=None): clauses=clauses, values=locals()) data = query.execute() + return data def scheduler_map_task(taskinfo): From ae8607b2618b59ea59ebb9c4c5c282e002533e0b Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Wed, 5 Apr 2023 16:58:31 -0400 Subject: [PATCH 27/92] actually pick a host and add a run --- kojihub/kojihub.py | 1 + kojihub/scheduler.py | 71 +++++++++++++++++++++++++------------------- 2 files changed, 42 insertions(+), 30 deletions(-) diff --git a/kojihub/kojihub.py b/kojihub/kojihub.py index e26b5d04..0453cc4f 100644 --- a/kojihub/kojihub.py +++ b/kojihub/kojihub.py @@ -14275,6 +14275,7 @@ class Host(object): # for builders using this old api, we fake some of this data to get them to take the # task runs assigned to them for task in tasks: + task['id'] = task['task_id'] # builder expects task id not run id task['state'] = koji.TASK_STATES['ASSIGNED'] task['host_id'] = self.id return [[host], tasks] diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index a29bb0f6..6cf0b4ea 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -2,7 +2,7 @@ import logging import psycopg2 import koji -from .db import QueryProcessor, db_lock +from .db import QueryProcessor, InsertProcessor, db_lock from .util import convert_value from koji.context import context @@ -37,6 +37,7 @@ def getTaskRuns(taskID=None, hostID=None, state=None): ('task_id', 'task_id'), ('host_id', 'host_id'), ('host.name', 'host_name'), + ('task.method', 'method'), ('state', 'state'), ("date_part('epoch', create_time)", 'create_ts'), ("date_part('epoch', start_time)", 'start_ts'), @@ -54,7 +55,7 @@ def getTaskRuns(taskID=None, hostID=None, state=None): query = QueryProcessor( columns=fields, aliases=aliases, tables=['scheduler_task_runs'], - joins=['LEFT OUTER JOIN host on host_id=host.id'], + joins=['host ON host_id=host.id', 'task ON task_id=task.id'], clauses=clauses, values=locals()) data = query.execute() @@ -77,7 +78,9 @@ class TaskScheduler(object): self.hosts = None self.tasks_by_bin = None self.active_tasks = None + self.free_tasks = None self.maxjobs = 15 # XXX + self.capacity_overcommit = 5 # TODO config def run(self): if not db_lock('scheduler', wait=False): @@ -101,6 +104,11 @@ class TaskScheduler(object): self.get_tasks() self.get_hosts() + # debug + logger.info(f'Hosts: {len(self.hosts)}') + logger.info(f'Free tasks: {len(self.free_tasks)}') + logger.info(f'Active tasks: {len(self.active_tasks)}') + # calculate host load and task count for task in self.active_tasks: # for now, we mirror what kojid updateTasks has been doing @@ -117,6 +125,7 @@ class TaskScheduler(object): for host in self.hosts.values(): host.setdefault('_load', 0.0) + host.setdefault('_ntasks', 0) # temporary test code logger.info(f'Host: {host}') ldiff = host['task_load'] - host['_load'] @@ -124,35 +133,36 @@ class TaskScheduler(object): # this is expected in a number of cases, just observing logger.info(f'Host load differs by {ldiff:.2f}: {host}') - # order bins by available host capacity - order = [] - for _bin in self.hosts_by_bin: - hosts = self.hosts_by_bin.get(_bin, []) - avail = sum([min(0, h['capacity'] - h['_load']) for h in hosts]) - order.append((avail, _bin)) - order.sort() - - # note bin demand for each host - for n, (avail, _bin) in enumerate(order): - rank = float(n) / len(order) - for host in self.hosts_by_bin.get(_bin, []): - host.setdefault('_rank', rank) - # so host rank is set by the most contentious bin it covers - # TODO - we could be smarter here, but it's a start - - # sort binned hosts by rank - for _bin in self.hosts_by_bin: - hosts = self.hosts_by_bin[_bin] - hosts.sort(key=lambda h: h['_rank'], reverse=True) - # hosts with least contention first + # figure out which hosts *can* take each task + # at the moment this is mostly just bin, but in the future it will be more complex + for task in self.free_tasks: + task['_hosts'] = [] + min_avail = task['weight'] + self.capacity_overcommit + for host in self.hosts_by_bin.get(task['_bin'], []): + if (host['capacity'] > host['_load'] and + host['_ntasks'] < self.maxjobs and + host['capacity'] - host['_load'] > min_avail): + task['_hosts'].append(host) + logger.info(f'Task {task["task_id"]}: {len(task["_hosts"])} options') + for host in task['_hosts']: + # demand gives us a rough measure of how much overall load is pending for the host + host.setdefault('_demand', 0.0) + host['_demand'] += task['weight'] / len(task['_hosts']) # tasks are already in priority order for task in self.free_tasks: - hosts = self.hosts_by_bin.get(task['_bin'], []) - # these are the hosts that _can_ take this task - # TODO - update host ranks as we go - # TODO - pick a host and assign - + # pick the host with least demand + task['_hosts'].sort(key=lambda h: h['_demand']) + min_avail = task['weight'] + self.capacity_overcommit + for host in task['_hosts']: + if (host['capacity'] > host['_load'] and + host['_ntasks'] < self.maxjobs and + host['capacity'] - host['_load'] > min_avail): + # add run entry + self.add_run(task, host) + # update our totals + host['_load'] += task['weight'] + host['_ntasks'] += 1 def get_tasks(self): """Get the task data that we need for scheduling""" @@ -268,6 +278,7 @@ class TaskScheduler(object): return hosts def add_run(self, task, host): - insert = InsertProcessor('scheduler_runs') - insert.set(task_id=task['id'], host_id=host['id'], state=koji.TASK_STATES['ASSIGNED']) + insert = InsertProcessor('scheduler_task_runs') + insert.set(task_id=task['task_id'], host_id=host['id'], state=koji.TASK_STATES['ASSIGNED']) insert.execute() + # TODO actually assign the task entry too From 3280bb4edcd4dd86f9a986ce8d0a1d2ca036a495 Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Thu, 6 Apr 2023 10:05:28 -0400 Subject: [PATCH 28/92] fragment --- kojihub/scheduler.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index 6cf0b4ea..f4f77a68 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -27,6 +27,8 @@ def intlist(value): return [int(value)] +def get_tasks_for_host(hostID): + def getTaskRuns(taskID=None, hostID=None, state=None): taskID = convert_value(taskID, cast=int, none_allowed=True) hostID = convert_value(hostID, cast=int, none_allowed=True) @@ -34,11 +36,11 @@ def getTaskRuns(taskID=None, hostID=None, state=None): fields = ( ('scheduler_task_runs.id', 'id'), - ('task_id', 'task_id'), - ('host_id', 'host_id'), + ('scheduler_task_runs.task_id', 'task_id'), + ('scheduler_task_runs.host_id', 'host_id'), ('host.name', 'host_name'), ('task.method', 'method'), - ('state', 'state'), + ('scheduler_task_runs.state', 'state'), ("date_part('epoch', create_time)", 'create_ts'), ("date_part('epoch', start_time)", 'start_ts'), ("date_part('epoch', end_time)", 'end_ts'), From 8492ac0afe6ac1a0b898bf795edb2d9d86bf2c86 Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Thu, 6 Apr 2023 15:23:46 -0400 Subject: [PATCH 29/92] update task query for hosts --- kojihub/kojihub.py | 4 ++-- kojihub/scheduler.py | 23 +++++++++++++++++++++++ 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/kojihub/kojihub.py b/kojihub/kojihub.py index 0453cc4f..3bb312fb 100644 --- a/kojihub/kojihub.py +++ b/kojihub/kojihub.py @@ -14266,12 +14266,12 @@ class Host(object): relatively complex, so we let the host machines crunch it.""" host = get_host(self.id) host['channels'] = [c['id'] for c in list_channels(hostID=self.id)] - tasks = scheduler.getTaskRuns(hostID=self.id) + tasks = scheduler.get_tasks_for_host(hostID=self.id) if not tasks: # try running scheduler if scheduler.TaskScheduler().run(): # check again - tasks = scheduler.getTaskRuns(hostID=self.id) + tasks = scheduler.get_tasks_for_host(hostID=self.id) # for builders using this old api, we fake some of this data to get them to take the # task runs assigned to them for task in tasks: diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index f4f77a68..01fa0395 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -28,6 +28,29 @@ def intlist(value): def get_tasks_for_host(hostID): + """Get the tasks assigned to a given host""" + hostID = convert_value(hostID, cast=int, none_allowed=True) + + fields = ( + ('task.id', 'id'), + ('task.state', 'state'), + ('task.channel_id', 'channel_id'), + ('task.host_id', 'host_id'), + ('task.arch', 'arch'), + ('task.method', 'method'), + ('task.priority', 'priority'), + ("date_part('epoch', create_time)", 'create_ts'), + ) + fields, aliases = zip(*fields) + + query = QueryProcessor( + columns=fields, aliases=aliases, tables=['task'], + clauses=['host_id = %(hostID)s', 'state=%(assigned)s'], + values={'hostID': hostID, 'assigned': koji.TASK_STATES['ASSIGNED']}, + ) + + return query.execute() + def getTaskRuns(taskID=None, hostID=None, state=None): taskID = convert_value(taskID, cast=int, none_allowed=True) From 883deb570af211479d773802eaeb7cec2361cd31 Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Thu, 6 Apr 2023 15:23:57 -0400 Subject: [PATCH 30/92] TEST2 --- kojihub/kojihub.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kojihub/kojihub.py b/kojihub/kojihub.py index 3bb312fb..de17e107 100644 --- a/kojihub/kojihub.py +++ b/kojihub/kojihub.py @@ -10041,6 +10041,10 @@ class RootExports(object): def TEST(self): scheduler.TaskScheduler().run() + def TEST2(self, hostID=1): + return Host(hostID).getLoadData() + # return scheduler.get_tasks_for_host(hostID) + def restartHosts(self, priority=5, options=None): """Spawns restartHosts task From ad5165723a499069f267b8f806d04366a9e879ab Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Thu, 6 Apr 2023 15:36:40 -0400 Subject: [PATCH 31/92] do assignment --- kojihub/kojihub.py | 6 ------ kojihub/scheduler.py | 10 ++++++++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/kojihub/kojihub.py b/kojihub/kojihub.py index de17e107..fd3c651b 100644 --- a/kojihub/kojihub.py +++ b/kojihub/kojihub.py @@ -14276,12 +14276,6 @@ class Host(object): if scheduler.TaskScheduler().run(): # check again tasks = scheduler.get_tasks_for_host(hostID=self.id) - # for builders using this old api, we fake some of this data to get them to take the - # task runs assigned to them - for task in tasks: - task['id'] = task['task_id'] # builder expects task id not run id - task['state'] = koji.TASK_STATES['ASSIGNED'] - task['host_id'] = self.id return [[host], tasks] def isEnabled(self): diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index 01fa0395..92bd9910 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -2,7 +2,7 @@ import logging import psycopg2 import koji -from .db import QueryProcessor, InsertProcessor, db_lock +from .db import QueryProcessor, InsertProcessor, UpdateProcessor, db_lock from .util import convert_value from koji.context import context @@ -306,4 +306,10 @@ class TaskScheduler(object): insert = InsertProcessor('scheduler_task_runs') insert.set(task_id=task['task_id'], host_id=host['id'], state=koji.TASK_STATES['ASSIGNED']) insert.execute() - # TODO actually assign the task entry too + update = UpdateProcessor( + 'task', + data={'host_id': host['id'], 'state': koji.TASK_STATES['ASSIGNED']}, + clauses=['id=%(task_id)s', 'state=%(free)s'], + values={'task_id': task['task_id'], 'free': koji.TASK_STATES['FREE']}, + ) + update.execute() From 0a3f6460bf5d5c0d6c2f5769bc71ec612c43ffe2 Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Thu, 6 Apr 2023 15:40:22 -0400 Subject: [PATCH 32/92] typo --- kojihub/kojihub.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kojihub/kojihub.py b/kojihub/kojihub.py index fd3c651b..531294b8 100644 --- a/kojihub/kojihub.py +++ b/kojihub/kojihub.py @@ -262,7 +262,7 @@ class Task(object): if state == koji.TASK_STATES['OPEN']: # also update run entry update2 = UpdateProcessor( - 'scheduler_runs', + 'scheduler_task_runs', clauses=['task_id=%(task_id)s', 'host_id=%(host_id)s'], values=locals(), data={'state': state}, @@ -373,7 +373,7 @@ class Task(object): # also update run entry update2 = UpdateProcessor( - 'scheduler_runs', + 'scheduler_task_runs', clauses=['task_id=%(task_id)s', 'host_id=%(host_id)s'], values=locals(), data={'state': state}, From 2ea9e4220b13cdbea1a2117997bd5ce89e71f4fa Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Thu, 6 Apr 2023 15:46:15 -0400 Subject: [PATCH 33/92] drop some run hooks for now --- kojihub/kojihub.py | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/kojihub/kojihub.py b/kojihub/kojihub.py index 531294b8..1e70f991 100644 --- a/kojihub/kojihub.py +++ b/kojihub/kojihub.py @@ -259,16 +259,6 @@ class Task(object): if state == koji.TASK_STATES['OPEN']: update.rawset(start_time='NOW()') update.execute() - if state == koji.TASK_STATES['OPEN']: - # also update run entry - update2 = UpdateProcessor( - 'scheduler_task_runs', - clauses=['task_id=%(task_id)s', 'host_id=%(host_id)s'], - values=locals(), - data={'state': state}, - rawdata={'start_time': 'NOW()'}, - ) - update2.execute() self.runCallbacks('postTaskStateChange', info, 'state', koji.TASK_STATES[newstate]) self.runCallbacks('postTaskStateChange', info, 'host_id', host_id) return True @@ -371,16 +361,6 @@ class Task(object): rawdata={'completion_time': 'NOW()'}) update.execute() - # also update run entry - update2 = UpdateProcessor( - 'scheduler_task_runs', - clauses=['task_id=%(task_id)s', 'host_id=%(host_id)s'], - values=locals(), - data={'state': state}, - rawdata={'end_time': 'NOW()'}, - ) - update2.execute() - self.runCallbacks('postTaskStateChange', info, 'state', state) self.runCallbacks('postTaskStateChange', info, 'completion_ts', now) From 5c29e49129b83e6936a3e69fb40e62878daa3de9 Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Thu, 6 Apr 2023 16:37:05 -0400 Subject: [PATCH 34/92] ignore session times for now, faster channel query --- kojihub/scheduler.py | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index 92bd9910..9e686178 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -266,7 +266,7 @@ class TaskScheduler(object): ('host.task_load', 'task_load'), ('host_config.arches', 'arches'), ('host_config.capacity', 'capacity'), - ("date_part('epoch', sessions.update_time)", 'update_ts'), + # ("date_part('epoch', sessions.update_time)", 'update_ts'), ) fields, aliases = zip(*fields) @@ -275,30 +275,33 @@ class TaskScheduler(object): columns=fields, aliases=aliases, clauses=[ - 'host.ready IS TRUE', +# 'host.ready IS TRUE', 'host_config.enabled IS TRUE', 'host_config.active IS TRUE', - 'sessions.expired IS FALSE', - 'sessions.master IS NULL', - "sessions.update_time > NOW() - '5 minutes'::interval" +# 'sessions.expired IS FALSE', +# 'sessions.master IS NULL', +# "sessions.update_time > NOW() - '5 minutes'::interval" ], joins=[ - 'sessions USING (user_id)', + # 'sessions USING (user_id)', 'host_config ON host.id = host_config.host_id' ] ) hosts = query.execute() + + # also get channel info + query = QueryProcessor( + tables=['host_channels'], + columns=['host_id', 'channel_id'], + clauses=['active IS TRUE', 'channels.enabled IS TRUE'], + joins=['channels ON host_channels.channel_id = channels.id'], + ) + chan_idx = {} + for row in query.execute(): + chan_idx.setdefault(row['host_id'], []).append(row['channel_id']) for host in hosts: - query = QueryProcessor( - tables=['host_channels'], - columns=['channel_id'], - clauses=['host_id=%(id)s', 'active IS TRUE', 'enabled IS TRUE'], - joins=['channels ON host_channels.channel_id = channels.id'], - values=host - ) - rows = query.execute() - host['channels'] = [row['channel_id'] for row in rows] + host['channels'] = chan_idx.get(host['id'], []) return hosts From ddca9d133b51788c92629a2b97d157d8f8b3610c Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Thu, 6 Apr 2023 23:06:08 -0400 Subject: [PATCH 35/92] fixes, better ranks, and more logging --- kojihub/scheduler.py | 40 +++++++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index 9e686178..df4f578b 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -151,6 +151,7 @@ class TaskScheduler(object): for host in self.hosts.values(): host.setdefault('_load', 0.0) host.setdefault('_ntasks', 0) + host.setdefault('_demand', 0.0) # temporary test code logger.info(f'Host: {host}') ldiff = host['task_load'] - host['_load'] @@ -162,32 +163,47 @@ class TaskScheduler(object): # at the moment this is mostly just bin, but in the future it will be more complex for task in self.free_tasks: task['_hosts'] = [] - min_avail = task['weight'] + self.capacity_overcommit + min_avail = min(0, task['weight'] - self.capacity_overcommit) for host in self.hosts_by_bin.get(task['_bin'], []): - if (host['capacity'] > host['_load'] and - host['_ntasks'] < self.maxjobs and - host['capacity'] - host['_load'] > min_avail): + if (host['capacity'] - host['_load'] > min_avail and + host['_ntasks'] < self.maxjobs): task['_hosts'].append(host) logger.info(f'Task {task["task_id"]}: {len(task["_hosts"])} options') + #import pdb; pdb.set_trace() for host in task['_hosts']: # demand gives us a rough measure of how much overall load is pending for the host host.setdefault('_demand', 0.0) host['_demand'] += task['weight'] / len(task['_hosts']) + # normalize demand to 1 + max_demand = sum([h['_demand'] for h in self.hosts.values()]) + if max_demand > 0.0: + for h in self.hosts.values(): + h['_demand'] = (h['_demand'] / max_demand) + + for h in self.hosts.values(): + self._rank_host(h) + # tasks are already in priority order for task in self.free_tasks: - # pick the host with least demand - task['_hosts'].sort(key=lambda h: h['_demand']) - min_avail = task['weight'] + self.capacity_overcommit + min_avail = task['weight'] - self.capacity_overcommit + task['_hosts'].sort(key=lambda h: h['_rank']) + logger.debug('Task %i choices: %s', task['task_id'], [(h['name'], "%(_rank).2f" %h) for h in task['_hosts']]) for host in task['_hosts']: - if (host['capacity'] > host['_load'] and - host['_ntasks'] < self.maxjobs and - host['capacity'] - host['_load'] > min_avail): + if (host['capacity'] - host['_load'] > min_avail and + host['_ntasks'] < self.maxjobs): # add run entry self.add_run(task, host) - # update our totals + # update our totals and rank host['_load'] += task['weight'] host['_ntasks'] += 1 + self._rank_host(host) + break + else: + logger.debug('Could not assign task %s', task['task_id']) + + def _rank_host(self, host): + host['_rank'] = host['_load'] + host['_ntasks'] + host['_demand'] def get_tasks(self): """Get the task data that we need for scheduling""" @@ -306,6 +322,8 @@ class TaskScheduler(object): return hosts def add_run(self, task, host): + logger.info('Assigning task %s (%s) to host %s', + task['task_id'], task['method'], host['name']) insert = InsertProcessor('scheduler_task_runs') insert.set(task_id=task['task_id'], host_id=host['id'], state=koji.TASK_STATES['ASSIGNED']) insert.execute() From 6737cb5fa5b8415ef138be3d9f3cc3547358f768 Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Fri, 7 Apr 2023 16:00:56 -0400 Subject: [PATCH 36/92] check on tasks, partially implement assign timeout --- kojihub/kojihub.py | 4 +- kojihub/scheduler.py | 100 +++++++++++++++++++++++++++++++++---------- schemas/schema.sql | 7 +-- 3 files changed, 82 insertions(+), 29 deletions(-) diff --git a/kojihub/kojihub.py b/kojihub/kojihub.py index 1e70f991..3011717d 100644 --- a/kojihub/kojihub.py +++ b/kojihub/kojihub.py @@ -10018,8 +10018,10 @@ def _delete_event_id(): class RootExports(object): '''Contains functions that are made available via XMLRPC''' - def TEST(self): + def TEST(self, fail=False): scheduler.TaskScheduler().run() + if fail: + raise Exception('DEBUG') def TEST2(self, hostID=1): return Host(hostID).getLoadData() diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index df4f578b..ea982291 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -1,5 +1,6 @@ import logging import psycopg2 +import time import koji from .db import QueryProcessor, InsertProcessor, UpdateProcessor, db_lock @@ -52,21 +53,19 @@ def get_tasks_for_host(hostID): return query.execute() -def getTaskRuns(taskID=None, hostID=None, state=None): +def getTaskRuns(taskID=None, hostID=None, active=None): taskID = convert_value(taskID, cast=int, none_allowed=True) hostID = convert_value(hostID, cast=int, none_allowed=True) - state = convert_value(state, cast=intlist, none_allowed=True) + active = convert_value(active, cast=bool, none_allowed=True) fields = ( ('scheduler_task_runs.id', 'id'), ('scheduler_task_runs.task_id', 'task_id'), ('scheduler_task_runs.host_id', 'host_id'), - ('host.name', 'host_name'), - ('task.method', 'method'), - ('scheduler_task_runs.state', 'state'), - ("date_part('epoch', create_time)", 'create_ts'), - ("date_part('epoch', start_time)", 'start_ts'), - ("date_part('epoch', end_time)", 'end_ts'), + # ('host.name', 'host_name'), + # ('task.method', 'method'), + ('scheduler_task_runs.active', 'active'), + ("date_part('epoch', scheduler_task_runs.create_time)", 'create_ts'), ) fields, aliases = zip(*fields) @@ -75,16 +74,15 @@ def getTaskRuns(taskID=None, hostID=None, state=None): clauses.append('task_id = %(taskID)s') if hostID is not None: clauses.append('host_id = %(hostID)s') - if state is not None: - clauses.append('host_id IN %(state)s') + if active is not None: + clauses.append('active = %(active)s') query = QueryProcessor( columns=fields, aliases=aliases, tables=['scheduler_task_runs'], - joins=['host ON host_id=host.id', 'task ON task_id=task.id'], + # joins=['host ON host_id=host.id', 'task ON task_id=task.id'], clauses=clauses, values=locals()) - data = query.execute() - return data + return query.execute() def scheduler_map_task(taskinfo): @@ -104,8 +102,11 @@ class TaskScheduler(object): self.tasks_by_bin = None self.active_tasks = None self.free_tasks = None + + # TODO these things need proper config self.maxjobs = 15 # XXX - self.capacity_overcommit = 5 # TODO config + self.capacity_overcommit = 5 + self.assign_timeout = 300 def run(self): if not db_lock('scheduler', wait=False): @@ -114,22 +115,16 @@ class TaskScheduler(object): self.do_schedule() # TODO clean up bad data (e.g. active tasks with no host) - # TODO check for runs that aren't getting picked up + self.check_active_tasks() return True - def get_runs(self): - runs = getTaskRuns() - runs_by_task = {} - for run in runs: - runs_by_task.setdefault(run['task_id'], []) - runs_by_task[run['task_id']].append(run) - def do_schedule(self): self.get_tasks() self.get_hosts() # debug + logger.info('Running task scheduler') logger.info(f'Hosts: {len(self.hosts)}') logger.info(f'Free tasks: {len(self.free_tasks)}') logger.info(f'Active tasks: {len(self.active_tasks)}') @@ -205,6 +200,52 @@ class TaskScheduler(object): def _rank_host(self, host): host['_rank'] = host['_load'] + host['_ntasks'] + host['_demand'] + def check_active_tasks(self): + """Check on active tasks""" + runs = self.get_active_runs() + logger.info('Found %i active runs', len(runs)) + logger.info('Checking on %i active tasks', len(self.active_tasks)) + for task in self.active_tasks: + if task['state'] == koji.TASK_STATES['ASSIGNED']: + # TODO check time since assigned + # if not taken within a timeout + # - if host not checking in, then make sure host marked unavail and free + # - if host *is* checking in, then treat as refusal and free + taskruns = runs.get(task['task_id'], []) + if not taskruns: + logger.error('No active run for assigned task %(task_id)s', task) + # TODO free + continue + else: + if len(taskruns) > 1: + logger.error('Multiple active run entries for assigned task %(task_id)s', + task) + # TODO fix + age = time.time() - min([r['create_ts'] for r in taskruns]) + if age > self.assign_timeout: + # TODO free + # TODO check host too + logger.info('Task assignment timeout for %(task_id)s', task) + pass + elif task['state'] == koji.TASK_STATES['OPEN']: + # TODO sanity check host + if not task['host_id']: + # shouldn't happen + # TODO + continue + host = self.hosts.get(task['host_id']) + if not host: + logger.error('Host for task is not available') + + def get_active_runs(self): + runs = getTaskRuns(active=True) + runs_by_task = {} + for run in runs: + runs_by_task.setdefault(run['task_id'], []) + runs_by_task[run['task_id']].append(run) + + return runs_by_task + def get_tasks(self): """Get the task data that we need for scheduling""" @@ -324,9 +365,22 @@ class TaskScheduler(object): def add_run(self, task, host): logger.info('Assigning task %s (%s) to host %s', task['task_id'], task['method'], host['name']) + + # mark any older runs inactive + update = UpdateProcessor( + 'scheduler_task_runs', + data={'active': False}, + clauses=['task_id=%(task_id)s', 'active = TRUE'], + values={'task_id': task['task_id']}, + ) + update.execute() + + # add the new run insert = InsertProcessor('scheduler_task_runs') - insert.set(task_id=task['task_id'], host_id=host['id'], state=koji.TASK_STATES['ASSIGNED']) + insert.set(task_id=task['task_id'], host_id=host['id']) insert.execute() + + # mark the task assigned update = UpdateProcessor( 'task', data={'host_id': host['id'], 'state': koji.TASK_STATES['ASSIGNED']}, diff --git a/schemas/schema.sql b/schemas/schema.sql index f97ac2e0..57ff6f5d 100644 --- a/schemas/schema.sql +++ b/schemas/schema.sql @@ -989,14 +989,11 @@ CREATE TABLE scheduler_task_runs ( id SERIAL NOT NULL PRIMARY KEY, task_id INTEGER REFERENCES task (id) NOT NULL, host_id INTEGER REFERENCES host (id) NOT NULL, - state INTEGER NOT NULL, - create_time TIMESTAMPTZ NOT NULL DEFAULT NOW(), - start_time TIMESTAMPTZ, - end_time TIMESTAMPTZ + active BOOLEAN NOT NULL DEFAULT TRUE, + create_time TIMESTAMPTZ NOT NULL DEFAULT NOW() ) WITHOUT OIDS; CREATE INDEX scheduler_task_runs_task ON scheduler_task_runs(task_id); CREATE INDEX scheduler_task_runs_host ON scheduler_task_runs(host_id); -CREATE INDEX scheduler_task_runs_state ON scheduler_task_runs(state); CREATE INDEX scheduler_task_runs_create_time ON scheduler_task_runs(create_time); From 30629f29fdb6ac80d10b7f3a8223d5453c646cc2 Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Fri, 7 Apr 2023 16:08:46 -0400 Subject: [PATCH 37/92] move Task class --- kojihub/kojihub.py | 454 -------------------------------------------- kojihub/task.py | 455 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 455 insertions(+), 454 deletions(-) create mode 100644 kojihub/task.py diff --git a/kojihub/kojihub.py b/kojihub/kojihub.py index 3011717d..080703b0 100644 --- a/kojihub/kojihub.py +++ b/kojihub/kojihub.py @@ -112,460 +112,6 @@ def xform_user_krb(entry): return entry -class Task(object): - """A task for the build hosts""" - - fields = ( - ('task.id', 'id'), - ('task.state', 'state'), - ('task.create_time', 'create_time'), - ("date_part('epoch', create_time)", 'create_ts'), - ('task.start_time', 'start_time'), - ("date_part('epoch', task.start_time)", 'start_ts'), - ('task.completion_time', 'completion_time'), - ("date_part('epoch', completion_time)", 'completion_ts'), - ('task.channel_id', 'channel_id'), - ('task.host_id', 'host_id'), - ('task.parent', 'parent'), - ('task.label', 'label'), - ('task.waiting', 'waiting'), - ('task.awaited', 'awaited'), - ('task.owner', 'owner'), - ('task.method', 'method'), - ('task.arch', 'arch'), - ('task.priority', 'priority'), - ('task.weight', 'weight')) - - def __init__(self, id): - self.id = convert_value(id, cast=int) - self.logger = logging.getLogger("koji.hub.Task") - - def _split_fields(self, fields=None): - """Helper function for split fields to QueryProcessor's - columns/aliases options""" - if fields is None: - fields = self.fields - columns = [f[0] for f in fields] - aliases = [f[1] for f in fields] - return columns, aliases - - def verifyHost(self, host_id=None): - """Verify that host owns task""" - if host_id is None: - host_id = context.session.host_id - if host_id is None: - return False - task_id = self.id - # getting a row lock on this task to ensure task assignment sanity - # no other concurrent transaction should be altering this row - query = QueryProcessor(tables=['task'], columns=['state', 'host_id'], - clauses=['id=%(task_id)s'], values={'task_id': task_id}, - opts={'rowlock': True}) - r = query.executeOne() - if not r: - raise koji.GenericError("No such task: %i" % task_id) - return (r['state'] == koji.TASK_STATES['OPEN'] and r['host_id'] == host_id) - - def assertHost(self, host_id): - if not self.verifyHost(host_id): - raise koji.ActionNotAllowed("host %d does not own task %d" % (host_id, self.id)) - - def getOwner(self): - """Return the owner (user_id) for this task""" - query = QueryProcessor(tables=['task'], columns=['owner'], - clauses=['id=%(id)i'], values=vars(self)) - return query.singleValue() - - def verifyOwner(self, user_id=None): - """Verify that user owns task""" - if user_id is None: - user_id = context.session.user_id - if user_id is None: - return False - task_id = self.id - # getting a row lock on this task to ensure task state sanity - query = QueryProcessor(tables=['task'], columns=['owner'], - clauses=['id=%(task_id)s'], values={'task_id': task_id}, - opts={'rowlock': True}) - owner = query.singleValue(strict=False) - if not owner: - raise koji.GenericError("No such task: %i" % task_id) - return (owner == user_id) - - def assertOwner(self, user_id=None): - if not self.verifyOwner(user_id): - raise koji.ActionNotAllowed("user %d does not own task %d" % (user_id, self.id)) - - def lock(self, host_id, newstate='OPEN', force=False): - """Attempt to associate the task for host, either to assign or open - - returns True if successful, False otherwise""" - info = self.getInfo(request=True) - self.runCallbacks('preTaskStateChange', info, 'state', koji.TASK_STATES[newstate]) - self.runCallbacks('preTaskStateChange', info, 'host_id', host_id) - # we use row-level locks to keep things sane - # note the QueryProcessor...opts={'rowlock': True} - task_id = self.id - if not force: - query = QueryProcessor(columns=['state', 'host_id'], tables=['task'], - clauses=['id=%(task_id)s'], values={'task_id': task_id}, - opts={'rowlock': True}) - r = query.executeOne() - if not r: - raise koji.GenericError("No such task: %i" % task_id) - state = r['state'] - otherhost = r['host_id'] - if state == koji.TASK_STATES['FREE']: - if otherhost is not None: - log_error(f"Error: task {task_id} is both free " - f"and handled by host {otherhost}") - return False - elif state == koji.TASK_STATES['ASSIGNED']: - if otherhost is None: - log_error(f"Error: task {task_id} is assigned, but no host is really assigned") - return False - elif otherhost != host_id: - # task is assigned to someone else, no error just return - return False - elif newstate == 'ASSIGNED': - # double assign is a weird situation but we can return True as state doesn't - # really change - log_error(f"Error: double assign of task {task_id} and host {host_id}") - return True - # otherwise the task is assigned to host_id, so keep going - elif state == koji.TASK_STATES['CANCELED']: - # it is ok that task was canceled meanwhile - return False - elif state == koji.TASK_STATES['OPEN']: - if otherhost is None: - log_error(f"Error: task {task_id} is opened but not handled by any host") - elif otherhost == host_id: - log_error(f"Error: task {task_id} is already open and handled by " - f"{host_id} (double open/assign)") - return False - else: - # state is CLOSED or FAILED - if otherhost is None: - log_error(f"Error: task {task_id} is non-free but not handled by any host " - f"(state {koji.TASK_STATES[state]})") - return False - # if we reach here, task is either - # - free and unlocked - # - assigned to host_id - # - force option is enabled - state = koji.TASK_STATES[newstate] - update = UpdateProcessor('task', clauses=['id=%(task_id)i'], values=locals()) - update.set(state=state, host_id=host_id) - if state == koji.TASK_STATES['OPEN']: - update.rawset(start_time='NOW()') - update.execute() - self.runCallbacks('postTaskStateChange', info, 'state', koji.TASK_STATES[newstate]) - self.runCallbacks('postTaskStateChange', info, 'host_id', host_id) - return True - - def assign(self, host_id, force=False): - """Attempt to assign the task to host. - - returns True if successful, False otherwise""" - return self.lock(host_id, 'ASSIGNED', force) - - def open(self, host_id): - """Attempt to open the task for host. - - returns task data if successful, None otherwise""" - if self.lock(host_id, 'OPEN'): - # get more complete data to return - fields = self.fields + (('task.request', 'request'),) - query = QueryProcessor(tables=['task'], clauses=['id=%(id)i'], values=vars(self), - columns=[f[0] for f in fields], aliases=[f[1] for f in fields]) - ret = query.executeOne() - if ret['request'].find(' Date: Fri, 7 Apr 2023 16:28:33 -0400 Subject: [PATCH 38/92] ... --- kojihub/kojihub.py | 1 + kojihub/task.py | 32 +++++++++++++++++++++++--------- 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/kojihub/kojihub.py b/kojihub/kojihub.py index 080703b0..98ce58a5 100644 --- a/kojihub/kojihub.py +++ b/kojihub/kojihub.py @@ -93,6 +93,7 @@ from .db import ( # noqa: F401 nextval, currval, ) +from .task import Task from .util import convert_value diff --git a/kojihub/task.py b/kojihub/task.py index e515b381..db0dd39a 100644 --- a/kojihub/task.py +++ b/kojihub/task.py @@ -1,4 +1,17 @@ # Task related hub code +import base64 +import logging +import time +import xmlrpc.client + +import koji +from .db import QueryProcessor, UpdateProcessor +from .util import convert_value +from koji.context import context +from koji.util import decode_bytes + + +logger = logging.getLogger('koji.hub.task') class Task(object): @@ -106,12 +119,13 @@ class Task(object): otherhost = r['host_id'] if state == koji.TASK_STATES['FREE']: if otherhost is not None: - log_error(f"Error: task {task_id} is both free " - f"and handled by host {otherhost}") + logger.error(f"Error: task {task_id} is both free " + f"and handled by host {otherhost}") return False elif state == koji.TASK_STATES['ASSIGNED']: if otherhost is None: - log_error(f"Error: task {task_id} is assigned, but no host is really assigned") + logger.error(f"Error: task {task_id} is assigned, but no host is really " + "assigned") return False elif otherhost != host_id: # task is assigned to someone else, no error just return @@ -119,7 +133,7 @@ class Task(object): elif newstate == 'ASSIGNED': # double assign is a weird situation but we can return True as state doesn't # really change - log_error(f"Error: double assign of task {task_id} and host {host_id}") + logger.error(f"Error: double assign of task {task_id} and host {host_id}") return True # otherwise the task is assigned to host_id, so keep going elif state == koji.TASK_STATES['CANCELED']: @@ -127,16 +141,16 @@ class Task(object): return False elif state == koji.TASK_STATES['OPEN']: if otherhost is None: - log_error(f"Error: task {task_id} is opened but not handled by any host") + logger.error(f"Error: task {task_id} is opened but not handled by any host") elif otherhost == host_id: - log_error(f"Error: task {task_id} is already open and handled by " - f"{host_id} (double open/assign)") + logger.error(f"Error: task {task_id} is already open and handled by " + f"{host_id} (double open/assign)") return False else: # state is CLOSED or FAILED if otherhost is None: - log_error(f"Error: task {task_id} is non-free but not handled by any host " - f"(state {koji.TASK_STATES[state]})") + logger.error(f"Error: task {task_id} is non-free but not handled by any host " + f"(state {koji.TASK_STATES[state]})") return False # if we reach here, task is either # - free and unlocked From 50186a6c58f924912b0eae54c3e50a66ad6817bb Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Fri, 7 Apr 2023 16:46:19 -0400 Subject: [PATCH 39/92] Revert "..." This reverts commit 8fc33e56070c09b94b5bd689e3871e537c2a8227. --- kojihub/kojihub.py | 1 - kojihub/task.py | 32 +++++++++----------------------- 2 files changed, 9 insertions(+), 24 deletions(-) diff --git a/kojihub/kojihub.py b/kojihub/kojihub.py index 98ce58a5..080703b0 100644 --- a/kojihub/kojihub.py +++ b/kojihub/kojihub.py @@ -93,7 +93,6 @@ from .db import ( # noqa: F401 nextval, currval, ) -from .task import Task from .util import convert_value diff --git a/kojihub/task.py b/kojihub/task.py index db0dd39a..e515b381 100644 --- a/kojihub/task.py +++ b/kojihub/task.py @@ -1,17 +1,4 @@ # Task related hub code -import base64 -import logging -import time -import xmlrpc.client - -import koji -from .db import QueryProcessor, UpdateProcessor -from .util import convert_value -from koji.context import context -from koji.util import decode_bytes - - -logger = logging.getLogger('koji.hub.task') class Task(object): @@ -119,13 +106,12 @@ class Task(object): otherhost = r['host_id'] if state == koji.TASK_STATES['FREE']: if otherhost is not None: - logger.error(f"Error: task {task_id} is both free " - f"and handled by host {otherhost}") + log_error(f"Error: task {task_id} is both free " + f"and handled by host {otherhost}") return False elif state == koji.TASK_STATES['ASSIGNED']: if otherhost is None: - logger.error(f"Error: task {task_id} is assigned, but no host is really " - "assigned") + log_error(f"Error: task {task_id} is assigned, but no host is really assigned") return False elif otherhost != host_id: # task is assigned to someone else, no error just return @@ -133,7 +119,7 @@ class Task(object): elif newstate == 'ASSIGNED': # double assign is a weird situation but we can return True as state doesn't # really change - logger.error(f"Error: double assign of task {task_id} and host {host_id}") + log_error(f"Error: double assign of task {task_id} and host {host_id}") return True # otherwise the task is assigned to host_id, so keep going elif state == koji.TASK_STATES['CANCELED']: @@ -141,16 +127,16 @@ class Task(object): return False elif state == koji.TASK_STATES['OPEN']: if otherhost is None: - logger.error(f"Error: task {task_id} is opened but not handled by any host") + log_error(f"Error: task {task_id} is opened but not handled by any host") elif otherhost == host_id: - logger.error(f"Error: task {task_id} is already open and handled by " - f"{host_id} (double open/assign)") + log_error(f"Error: task {task_id} is already open and handled by " + f"{host_id} (double open/assign)") return False else: # state is CLOSED or FAILED if otherhost is None: - logger.error(f"Error: task {task_id} is non-free but not handled by any host " - f"(state {koji.TASK_STATES[state]})") + log_error(f"Error: task {task_id} is non-free but not handled by any host " + f"(state {koji.TASK_STATES[state]})") return False # if we reach here, task is either # - free and unlocked From 8e642152b1dd88f3260aa2e8275ae942cd2646f7 Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Fri, 7 Apr 2023 16:46:20 -0400 Subject: [PATCH 40/92] Revert "move Task class" This reverts commit e385df81fb90b4cb15928ff64fac239ce90ca4fd. --- kojihub/kojihub.py | 454 ++++++++++++++++++++++++++++++++++++++++++++ kojihub/task.py | 455 --------------------------------------------- 2 files changed, 454 insertions(+), 455 deletions(-) delete mode 100644 kojihub/task.py diff --git a/kojihub/kojihub.py b/kojihub/kojihub.py index 080703b0..3011717d 100644 --- a/kojihub/kojihub.py +++ b/kojihub/kojihub.py @@ -112,6 +112,460 @@ def xform_user_krb(entry): return entry +class Task(object): + """A task for the build hosts""" + + fields = ( + ('task.id', 'id'), + ('task.state', 'state'), + ('task.create_time', 'create_time'), + ("date_part('epoch', create_time)", 'create_ts'), + ('task.start_time', 'start_time'), + ("date_part('epoch', task.start_time)", 'start_ts'), + ('task.completion_time', 'completion_time'), + ("date_part('epoch', completion_time)", 'completion_ts'), + ('task.channel_id', 'channel_id'), + ('task.host_id', 'host_id'), + ('task.parent', 'parent'), + ('task.label', 'label'), + ('task.waiting', 'waiting'), + ('task.awaited', 'awaited'), + ('task.owner', 'owner'), + ('task.method', 'method'), + ('task.arch', 'arch'), + ('task.priority', 'priority'), + ('task.weight', 'weight')) + + def __init__(self, id): + self.id = convert_value(id, cast=int) + self.logger = logging.getLogger("koji.hub.Task") + + def _split_fields(self, fields=None): + """Helper function for split fields to QueryProcessor's + columns/aliases options""" + if fields is None: + fields = self.fields + columns = [f[0] for f in fields] + aliases = [f[1] for f in fields] + return columns, aliases + + def verifyHost(self, host_id=None): + """Verify that host owns task""" + if host_id is None: + host_id = context.session.host_id + if host_id is None: + return False + task_id = self.id + # getting a row lock on this task to ensure task assignment sanity + # no other concurrent transaction should be altering this row + query = QueryProcessor(tables=['task'], columns=['state', 'host_id'], + clauses=['id=%(task_id)s'], values={'task_id': task_id}, + opts={'rowlock': True}) + r = query.executeOne() + if not r: + raise koji.GenericError("No such task: %i" % task_id) + return (r['state'] == koji.TASK_STATES['OPEN'] and r['host_id'] == host_id) + + def assertHost(self, host_id): + if not self.verifyHost(host_id): + raise koji.ActionNotAllowed("host %d does not own task %d" % (host_id, self.id)) + + def getOwner(self): + """Return the owner (user_id) for this task""" + query = QueryProcessor(tables=['task'], columns=['owner'], + clauses=['id=%(id)i'], values=vars(self)) + return query.singleValue() + + def verifyOwner(self, user_id=None): + """Verify that user owns task""" + if user_id is None: + user_id = context.session.user_id + if user_id is None: + return False + task_id = self.id + # getting a row lock on this task to ensure task state sanity + query = QueryProcessor(tables=['task'], columns=['owner'], + clauses=['id=%(task_id)s'], values={'task_id': task_id}, + opts={'rowlock': True}) + owner = query.singleValue(strict=False) + if not owner: + raise koji.GenericError("No such task: %i" % task_id) + return (owner == user_id) + + def assertOwner(self, user_id=None): + if not self.verifyOwner(user_id): + raise koji.ActionNotAllowed("user %d does not own task %d" % (user_id, self.id)) + + def lock(self, host_id, newstate='OPEN', force=False): + """Attempt to associate the task for host, either to assign or open + + returns True if successful, False otherwise""" + info = self.getInfo(request=True) + self.runCallbacks('preTaskStateChange', info, 'state', koji.TASK_STATES[newstate]) + self.runCallbacks('preTaskStateChange', info, 'host_id', host_id) + # we use row-level locks to keep things sane + # note the QueryProcessor...opts={'rowlock': True} + task_id = self.id + if not force: + query = QueryProcessor(columns=['state', 'host_id'], tables=['task'], + clauses=['id=%(task_id)s'], values={'task_id': task_id}, + opts={'rowlock': True}) + r = query.executeOne() + if not r: + raise koji.GenericError("No such task: %i" % task_id) + state = r['state'] + otherhost = r['host_id'] + if state == koji.TASK_STATES['FREE']: + if otherhost is not None: + log_error(f"Error: task {task_id} is both free " + f"and handled by host {otherhost}") + return False + elif state == koji.TASK_STATES['ASSIGNED']: + if otherhost is None: + log_error(f"Error: task {task_id} is assigned, but no host is really assigned") + return False + elif otherhost != host_id: + # task is assigned to someone else, no error just return + return False + elif newstate == 'ASSIGNED': + # double assign is a weird situation but we can return True as state doesn't + # really change + log_error(f"Error: double assign of task {task_id} and host {host_id}") + return True + # otherwise the task is assigned to host_id, so keep going + elif state == koji.TASK_STATES['CANCELED']: + # it is ok that task was canceled meanwhile + return False + elif state == koji.TASK_STATES['OPEN']: + if otherhost is None: + log_error(f"Error: task {task_id} is opened but not handled by any host") + elif otherhost == host_id: + log_error(f"Error: task {task_id} is already open and handled by " + f"{host_id} (double open/assign)") + return False + else: + # state is CLOSED or FAILED + if otherhost is None: + log_error(f"Error: task {task_id} is non-free but not handled by any host " + f"(state {koji.TASK_STATES[state]})") + return False + # if we reach here, task is either + # - free and unlocked + # - assigned to host_id + # - force option is enabled + state = koji.TASK_STATES[newstate] + update = UpdateProcessor('task', clauses=['id=%(task_id)i'], values=locals()) + update.set(state=state, host_id=host_id) + if state == koji.TASK_STATES['OPEN']: + update.rawset(start_time='NOW()') + update.execute() + self.runCallbacks('postTaskStateChange', info, 'state', koji.TASK_STATES[newstate]) + self.runCallbacks('postTaskStateChange', info, 'host_id', host_id) + return True + + def assign(self, host_id, force=False): + """Attempt to assign the task to host. + + returns True if successful, False otherwise""" + return self.lock(host_id, 'ASSIGNED', force) + + def open(self, host_id): + """Attempt to open the task for host. + + returns task data if successful, None otherwise""" + if self.lock(host_id, 'OPEN'): + # get more complete data to return + fields = self.fields + (('task.request', 'request'),) + query = QueryProcessor(tables=['task'], clauses=['id=%(id)i'], values=vars(self), + columns=[f[0] for f in fields], aliases=[f[1] for f in fields]) + ret = query.executeOne() + if ret['request'].find(' Date: Fri, 7 Apr 2023 17:02:35 -0400 Subject: [PATCH 41/92] do some task freeing --- kojihub/scheduler.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index ea982291..e427209d 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -3,6 +3,7 @@ import psycopg2 import time import koji +from . import kojihub from .db import QueryProcessor, InsertProcessor, UpdateProcessor, db_lock from .util import convert_value from koji.context import context @@ -62,8 +63,8 @@ def getTaskRuns(taskID=None, hostID=None, active=None): ('scheduler_task_runs.id', 'id'), ('scheduler_task_runs.task_id', 'task_id'), ('scheduler_task_runs.host_id', 'host_id'), - # ('host.name', 'host_name'), - # ('task.method', 'method'), + # ('host.name', 'host_name'), + # ('task.method', 'method'), ('scheduler_task_runs.active', 'active'), ("date_part('epoch', scheduler_task_runs.create_time)", 'create_ts'), ) @@ -214,7 +215,7 @@ class TaskScheduler(object): taskruns = runs.get(task['task_id'], []) if not taskruns: logger.error('No active run for assigned task %(task_id)s', task) - # TODO free + kojihub.Task(task['task_id']).free() continue else: if len(taskruns) > 1: @@ -223,19 +224,21 @@ class TaskScheduler(object): # TODO fix age = time.time() - min([r['create_ts'] for r in taskruns]) if age > self.assign_timeout: - # TODO free # TODO check host too logger.info('Task assignment timeout for %(task_id)s', task) + kojihub.Task(task['task_id']).free() pass elif task['state'] == koji.TASK_STATES['OPEN']: # TODO sanity check host if not task['host_id']: # shouldn't happen - # TODO + logger.error('Open task with no host %(task_id)s', task) + kojihub.Task(task['task_id']).free() continue host = self.hosts.get(task['host_id']) if not host: logger.error('Host for task is not available') + # TODO def get_active_runs(self): runs = getTaskRuns(active=True) From a9a23900e3f890570bd2075755ded69e1368c853 Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Fri, 7 Apr 2023 17:12:08 -0400 Subject: [PATCH 42/92] export getTaskRuns --- kojihub/scheduler.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index e427209d..6229c627 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -17,10 +17,6 @@ class DBLogger: pass -class SchedulerExports: - pass - - def intlist(value): """Cast value to a list of ints""" if isinstance(value, (list, tuple)): @@ -54,7 +50,7 @@ def get_tasks_for_host(hostID): return query.execute() -def getTaskRuns(taskID=None, hostID=None, active=None): +def get_task_runs(taskID=None, hostID=None, active=None): taskID = convert_value(taskID, cast=int, none_allowed=True) hostID = convert_value(hostID, cast=int, none_allowed=True) active = convert_value(active, cast=bool, none_allowed=True) @@ -241,7 +237,7 @@ class TaskScheduler(object): # TODO def get_active_runs(self): - runs = getTaskRuns(active=True) + runs = get_task_runs(active=True) runs_by_task = {} for run in runs: runs_by_task.setdefault(run['task_id'], []) @@ -391,3 +387,7 @@ class TaskScheduler(object): values={'task_id': task['task_id'], 'free': koji.TASK_STATES['FREE']}, ) update.execute() + + +class SchedulerExports: + getTaskRuns = staticmethod(get_task_runs) From 83f26189b675d3781a393aae2c90387ab6f6e945 Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Fri, 7 Apr 2023 18:03:09 -0400 Subject: [PATCH 43/92] end stale runs --- kojihub/scheduler.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index 6229c627..97923a66 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -236,6 +236,17 @@ class TaskScheduler(object): logger.error('Host for task is not available') # TODO + # end stale runs + update = UpdateProcessor( + 'scheduler_task_runs', + data={'active': False}, + clauses=['active = TRUE', + '(SELECT id FROM task WHERE task.id=task_id AND state IN %(states)s) IS NULL'], + values={'states': [koji.TASK_STATES[s] for s in ('OPEN', 'ASSIGNED')]}, + ) + update.execute() + + def get_active_runs(self): runs = get_task_runs(active=True) runs_by_task = {} From f07c38155d519a9fec9e19fe59ad330edce62c52 Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Fri, 7 Apr 2023 21:19:49 -0400 Subject: [PATCH 44/92] implement host timeout in scheduler --- kojihub/scheduler.py | 67 +++++++++++++++++++++++++------------------- 1 file changed, 38 insertions(+), 29 deletions(-) diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index 97923a66..81cb88e5 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -104,6 +104,7 @@ class TaskScheduler(object): self.maxjobs = 15 # XXX self.capacity_overcommit = 5 self.assign_timeout = 300 + self.host_timeout = 900 def run(self): if not db_lock('scheduler', wait=False): @@ -203,38 +204,50 @@ class TaskScheduler(object): logger.info('Found %i active runs', len(runs)) logger.info('Checking on %i active tasks', len(self.active_tasks)) for task in self.active_tasks: + + if not task['host_id']: + logger.error('Active task with no host: %s', task['task_id']) + kojihub.Task(task['task_id']).free() + continue + + host = self.hosts.get(task['host_id']) + if not host: + # host disabled? + # TODO + continue + + taskruns = runs.get(task['task_id'], []) + if not taskruns: + logger.error('No active run for assigned task %(task_id)s', task) + kojihub.Task(task['task_id']).free() + continue + + if len(taskruns) > 1: + logger.error('Multiple active run entries for assigned task %(task_id)s', + task) + # TODO fix + if task['state'] == koji.TASK_STATES['ASSIGNED']: # TODO check time since assigned # if not taken within a timeout # - if host not checking in, then make sure host marked unavail and free # - if host *is* checking in, then treat as refusal and free - taskruns = runs.get(task['task_id'], []) - if not taskruns: - logger.error('No active run for assigned task %(task_id)s', task) + age = time.time() - min([r['create_ts'] for r in taskruns]) + if age > self.assign_timeout: + logger.info('Task assignment timeout for %(task_id)s', task) kojihub.Task(task['task_id']).free() - continue - else: - if len(taskruns) > 1: - logger.error('Multiple active run entries for assigned task %(task_id)s', - task) - # TODO fix - age = time.time() - min([r['create_ts'] for r in taskruns]) - if age > self.assign_timeout: - # TODO check host too - logger.info('Task assignment timeout for %(task_id)s', task) - kojihub.Task(task['task_id']).free() - pass + elif task['state'] == koji.TASK_STATES['OPEN']: - # TODO sanity check host - if not task['host_id']: - # shouldn't happen - logger.error('Open task with no host %(task_id)s', task) + if host['update_ts'] is None: + # shouldn't happen? + # fall back to task_run time + age = time.time() - min([r['create_ts'] for r in taskruns]) + else: + age = time.time() - host['update_ts'] + if age > self.host_timeout: + logger.info('Freeing task %s from unresponsive host %s', + task['task_id'], host['name']) kojihub.Task(task['task_id']).free() - continue - host = self.hosts.get(task['host_id']) - if not host: - logger.error('Host for task is not available') - # TODO # end stale runs update = UpdateProcessor( @@ -330,10 +343,10 @@ class TaskScheduler(object): fields = ( ('host.id', 'id'), ('host.name', 'name'), + ("date_part('epoch', host.update_time)", 'update_ts'), ('host.task_load', 'task_load'), ('host_config.arches', 'arches'), ('host_config.capacity', 'capacity'), - # ("date_part('epoch', sessions.update_time)", 'update_ts'), ) fields, aliases = zip(*fields) @@ -345,12 +358,8 @@ class TaskScheduler(object): # 'host.ready IS TRUE', 'host_config.enabled IS TRUE', 'host_config.active IS TRUE', -# 'sessions.expired IS FALSE', -# 'sessions.master IS NULL', -# "sessions.update_time > NOW() - '5 minutes'::interval" ], joins=[ - # 'sessions USING (user_id)', 'host_config ON host.id = host_config.host_id' ] ) From 7213f6dd910dc89c9154f6eeb0a23a76f445d04b Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Mon, 10 Apr 2023 16:53:09 -0400 Subject: [PATCH 45/92] fix stale ready states --- kojihub/scheduler.py | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index 81cb88e5..95f47f59 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -103,6 +103,7 @@ class TaskScheduler(object): # TODO these things need proper config self.maxjobs = 15 # XXX self.capacity_overcommit = 5 + self.ready_timeout = 180 self.assign_timeout = 300 self.host_timeout = 900 @@ -112,8 +113,8 @@ class TaskScheduler(object): return False self.do_schedule() - # TODO clean up bad data (e.g. active tasks with no host) self.check_active_tasks() + self.check_hosts() return True @@ -260,6 +261,25 @@ class TaskScheduler(object): update.execute() + def check_hosts(self): + # sanity check ready status + hosts_to_mark = [] + for host in self.hosts.values(): + if not host['ready']: + continue + if (host['update_ts'] is None or time.time() - host['update_ts'] > self.ready_timeout): + hosts_to_mark.append(host) + + if hosts_to_mark: + update = db.UpdateProcessor( + 'host', + data={'ready': False}, + clauses=['host_id IN %(host_ids)s'], + values={'host_ids': [h['id'] for h in hosts_to_mark]}, + ) + update.execute() + + def get_active_runs(self): runs = get_task_runs(active=True) runs_by_task = {} @@ -325,7 +345,7 @@ class TaskScheduler(object): # get hosts and bin them hosts_by_bin = {} hosts_by_id = {} - for host in self.get_ready_hosts(): + for host in self._get_hosts(): host['_bins'] = [] hosts_by_id[host['id']] = host for chan in host['channels']: @@ -337,14 +357,15 @@ class TaskScheduler(object): self.hosts_by_bin = hosts_by_bin self.hosts = hosts_by_id - def get_ready_hosts(self): - """Query hosts that are ready to build""" + def _get_hosts(self): + """Query enabled hosts""" fields = ( ('host.id', 'id'), ('host.name', 'name'), ("date_part('epoch', host.update_time)", 'update_ts'), ('host.task_load', 'task_load'), + ('host.ready', 'ready'), ('host_config.arches', 'arches'), ('host_config.capacity', 'capacity'), ) @@ -355,7 +376,6 @@ class TaskScheduler(object): columns=fields, aliases=aliases, clauses=[ -# 'host.ready IS TRUE', 'host_config.enabled IS TRUE', 'host_config.active IS TRUE', ], From f54e4f6acb5ce80abda6d75e48f671b24e2d6847 Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Tue, 11 Apr 2023 15:58:42 -0400 Subject: [PATCH 46/92] simple db logging --- kojihub/kojihub.py | 8 -------- kojihub/scheduler.py | 32 ++++++++++++++++++++++---------- schemas/schema.sql | 3 --- 3 files changed, 22 insertions(+), 21 deletions(-) diff --git a/kojihub/kojihub.py b/kojihub/kojihub.py index 3011717d..22c60f8d 100644 --- a/kojihub/kojihub.py +++ b/kojihub/kojihub.py @@ -97,7 +97,6 @@ from .util import convert_value logger = logging.getLogger('koji.hub') -sched_logger = scheduler.DBLogger() NUMERIC_TYPES = (int, float) @@ -14355,8 +14354,6 @@ class HostExports(object): insert = InsertProcessor(table=table, data={'data': hostdata}, clauses=clauses, values=values) insert.execute() - sched_logger.debug(f"Updating host data with: {hostdata}", - host_id=host.id, location='setHostData') def getTasks(self): host = Host() @@ -14377,9 +14374,6 @@ class HostExports(object): } ) tasks = query.execute() - for task in tasks: - sched_logger.debug("Sending task", host_id=host.id, task_id=task['id'], - location="getTasks") return tasks def refuseTask(self, task_id): @@ -14390,8 +14384,6 @@ class HostExports(object): task = Task(task_id) task.free(newstate=koji.TASK_STATES['REFUSED']) - sched_logger.warning("Refusing task", host_id=host.id, task_id=task_id, - location="refuseTask") return True def getHostTasks(self): diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index 95f47f59..881d49b0 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -10,11 +10,21 @@ from koji.context import context logger = logging.getLogger('koji.scheduler') -# TODO set up db logging -class DBLogger: - pass +def log_db(msg, task_id=None, host_id=None): + insert = InsertProcessor( + 'scheduler_log_messages', + data={'msg': msg, 'task_id': task_id, 'host_id': host_id}, + ) + insert.execute() + + +def log_both(msg, task_id=None, host_id=None, level=logging.INFO): + pre1 = f"[task_id={task_id}] " if task_id else "" + pre2 = f"[host_id={host_id}] " if host_id else "" + logger.log(level, '%s%s%s', pre1, pre2, msg) + log_db(msg, task_id, host_id) def intlist(value): @@ -207,7 +217,7 @@ class TaskScheduler(object): for task in self.active_tasks: if not task['host_id']: - logger.error('Active task with no host: %s', task['task_id']) + log_both('Active task with no host', task_id=task['task_id'], level=logging.ERROR) kojihub.Task(task['task_id']).free() continue @@ -219,7 +229,8 @@ class TaskScheduler(object): taskruns = runs.get(task['task_id'], []) if not taskruns: - logger.error('No active run for assigned task %(task_id)s', task) + log_both('Assigned task with no active run entry', task_id=task['task_id'], + host_id=host['id'], level=logging.ERROR) kojihub.Task(task['task_id']).free() continue @@ -235,7 +246,8 @@ class TaskScheduler(object): # - if host *is* checking in, then treat as refusal and free age = time.time() - min([r['create_ts'] for r in taskruns]) if age > self.assign_timeout: - logger.info('Task assignment timeout for %(task_id)s', task) + log_both('Task assignment timeout', task_id=task['task_id'], + host_id=host['id']) kojihub.Task(task['task_id']).free() elif task['state'] == koji.TASK_STATES['OPEN']: @@ -246,8 +258,8 @@ class TaskScheduler(object): else: age = time.time() - host['update_ts'] if age > self.host_timeout: - logger.info('Freeing task %s from unresponsive host %s', - task['task_id'], host['name']) + log_both('Freeing task from unresponsive host', task_id=task['task_id'], + host_id=host['id']) kojihub.Task(task['task_id']).free() # end stale runs @@ -269,6 +281,7 @@ class TaskScheduler(object): continue if (host['update_ts'] is None or time.time() - host['update_ts'] > self.ready_timeout): hosts_to_mark.append(host) + log_both('Marking host not ready', host_id=host['id']) if hosts_to_mark: update = db.UpdateProcessor( @@ -402,8 +415,7 @@ class TaskScheduler(object): return hosts def add_run(self, task, host): - logger.info('Assigning task %s (%s) to host %s', - task['task_id'], task['method'], host['name']) + log_both('Assigning task', task_id=task['task_id'], host_id=host['id']) # mark any older runs inactive update = UpdateProcessor( diff --git a/schemas/schema.sql b/schemas/schema.sql index 57ff6f5d..9aae6bf7 100644 --- a/schemas/schema.sql +++ b/schemas/schema.sql @@ -1026,9 +1026,6 @@ CREATE TABLE scheduler_log_messages ( task_id INTEGER REFERENCES task (id), host_id INTEGER REFERENCES host (id), msg_time TIMESTAMPTZ NOT NULL DEFAULT NOW(), - logger_name VARCHAR(200) NOT NULL, - level VARCHAR(10) NOT NULL, - location VARCHAR(200), msg TEXT NOT NULL ) WITHOUT OIDS; From 74fd7617509853f9030e6d0cb6ef3bee6bf435e2 Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Tue, 11 Apr 2023 22:51:25 -0400 Subject: [PATCH 47/92] fixes --- kojihub/scheduler.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index 881d49b0..07c3bc50 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -122,16 +122,15 @@ class TaskScheduler(object): # already running elsewhere return False + self.get_tasks() + self.get_hosts() + self.check_hosts() self.do_schedule() self.check_active_tasks() - self.check_hosts() return True def do_schedule(self): - self.get_tasks() - self.get_hosts() - # debug logger.info('Running task scheduler') logger.info(f'Hosts: {len(self.hosts)}') @@ -169,8 +168,8 @@ class TaskScheduler(object): task['_hosts'] = [] min_avail = min(0, task['weight'] - self.capacity_overcommit) for host in self.hosts_by_bin.get(task['_bin'], []): - if (host['capacity'] - host['_load'] > min_avail and - host['_ntasks'] < self.maxjobs): + if (host['ready'] and host['_ntasks'] < self.maxjobs and + host['capacity'] - host['_load'] > min_avail): task['_hosts'].append(host) logger.info(f'Task {task["task_id"]}: {len(task["_hosts"])} options') #import pdb; pdb.set_trace() From 1f54dbdf8f8b8569539da6e2be256785f0b2135a Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Tue, 11 Apr 2023 23:02:01 -0400 Subject: [PATCH 48/92] watch-tasks: show host for assigned tasks --- cli/koji_cli/lib.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cli/koji_cli/lib.py b/cli/koji_cli/lib.py index 2ec1a1fd..c605940f 100644 --- a/cli/koji_cli/lib.py +++ b/cli/koji_cli/lib.py @@ -261,12 +261,13 @@ class TaskWatcher(object): # not finished either. info would be none. if not info: return 'unknown' - if info['state'] == koji.TASK_STATES['OPEN']: + if koji.TASK_STATES[info['state']] in ['OPEN', 'ASSIGNED']: + state = koji.TASK_STATES[info['state']].lower() if info['host_id']: host = self.session.getHost(info['host_id']) - return 'open (%s)' % host['name'] + return '%s (%s)' % (state, host['name']) else: - return 'open' + return state elif info['state'] == koji.TASK_STATES['FAILED']: s = 'FAILED: %s' % self.get_failure() From 4b5596b9bd85a20cb643de0b669bc855a7be60d8 Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Thu, 20 Apr 2023 18:20:33 -0400 Subject: [PATCH 49/92] Revert "move convert_value to util lib" This reverts commit 61f5d2bd46623a9fd9c74ae67f81a7d43da7821f. --- kojihub/kojihub.py | 31 ++++++++++++++++++++++++++++++- kojihub/scheduler.py | 4 +++- kojihub/util.py | 33 --------------------------------- 3 files changed, 33 insertions(+), 35 deletions(-) delete mode 100644 kojihub/util.py diff --git a/kojihub/kojihub.py b/kojihub/kojihub.py index 22c60f8d..a8317bf1 100644 --- a/kojihub/kojihub.py +++ b/kojihub/kojihub.py @@ -93,7 +93,6 @@ from .db import ( # noqa: F401 nextval, currval, ) -from .util import convert_value logger = logging.getLogger('koji.hub') @@ -111,6 +110,36 @@ def xform_user_krb(entry): return entry +def convert_value(value, cast=None, message=None, + exc_type=koji.ParameterError, none_allowed=False, check_only=False): + """Cast to another type with tailored exception + + :param any value: tested object + :param type cast: To which type value should be cast + :param type exc_type: Raise this exception + :param bool none_allowed: Is None valid value? + :param check_only: Don't convert but raise an exception if type(value) != cast + + :returns any value: returns converted value + """ + if value is None: + if not none_allowed: + raise exc_type(message or f"Invalid type, expected type {cast}") + else: + return value + if check_only: + if not isinstance(value, cast): + raise exc_type(message or f"Invalid type for value '{value}': {type(value)}, " + f"expected type {cast}") + else: + try: + value = cast(value) + except (ValueError, TypeError): + raise exc_type(message or f"Invalid type for value '{value}': {type(value)}, " + f"expected type {cast}") + return value + + class Task(object): """A task for the build hosts""" diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index 07c3bc50..768e27c2 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -5,10 +5,12 @@ import time import koji from . import kojihub from .db import QueryProcessor, InsertProcessor, UpdateProcessor, db_lock -from .util import convert_value from koji.context import context +convert_value = kojihub.convert_value + + logger = logging.getLogger('koji.scheduler') diff --git a/kojihub/util.py b/kojihub/util.py deleted file mode 100644 index a874676d..00000000 --- a/kojihub/util.py +++ /dev/null @@ -1,33 +0,0 @@ -import koji - - -def convert_value(value, cast=None, message=None, - exc_type=koji.ParameterError, none_allowed=False, check_only=False): - """Cast to another type with tailored exception - - :param any value: tested object - :param type cast: To which type value should be cast - :param type exc_type: Raise this exception - :param bool none_allowed: Is None valid value? - :param check_only: Don't convert but raise an exception if type(value) != cast - - :returns any value: returns converted value - """ - if value is None: - if not none_allowed: - raise exc_type(message or f"Invalid type, expected type {cast}") - else: - return value - if check_only: - if not isinstance(value, cast): - raise exc_type(message or f"Invalid type for value '{value}': {type(value)}, " - f"expected type {cast}") - else: - try: - value = cast(value) - except (ValueError, TypeError): - raise exc_type(message or f"Invalid type for value '{value}': {type(value)}, " - f"expected type {cast}") - return value - - From 08a822dddf0ba6e969b0b501bf42d84cd38e8f65 Mon Sep 17 00:00:00 2001 From: Tomas Kopecek Date: Tue, 29 Nov 2022 15:35:17 +0100 Subject: [PATCH 50/92] delete old scheduler log messages --- util/koji-sweep-db | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/util/koji-sweep-db b/util/koji-sweep-db index cd62346b..373bb8e3 100755 --- a/util/koji-sweep-db +++ b/util/koji-sweep-db @@ -147,6 +147,21 @@ def clean_buildroots(cursor, vacuum, test): cursor.execute("VACUUM ANALYZE buildroot") +def clean_scheduler_logs(cursor, vacuum, test, age): + clauses = f"(msg_time < NOW() - '{age:d} days'::interval)" + if options.verbose: + query = QueryProcessor(tables=["scheduler_log_messages"], + clauses=clauses, + opts={'countOnly': True}) + rows = query.execute() + print(f"Deleting {rows} scheduler log messages") + if not test: + delete = DeleteProcessor(table="scheduler_log_messages", clauses=clauses)\ + delete.execute() + if vacuum: + cursor.execute("VACUUM ANALYZE scheduler_log_messages") + + if __name__ == "__main__": global options parser = OptionParser("%prog cleans koji database") @@ -180,6 +195,9 @@ if __name__ == "__main__": parser.add_option('--scratch-builds-age', type=int, dest="scratch_age", action="store", default=730, metavar="DAYS", help="Delete scratch builds' tasks older than this (default: 2 years") + parser.add_option('--logs-age', type=int, + action="store", default=7, metavar="DAYS", + help="Delete scheduler log messages older than this (default: 7 days)") parser.add_option('--buildroots', action="store_true", help="Delete unreferenced buildroots") parser.add_option('-f', '--force', action="store_true", @@ -240,6 +258,7 @@ if __name__ == "__main__": clean_sessions(cursor, options.vacuum, options.test, options.sessions_age, options.sessions_absolute_age) clean_reservations(cursor, options.vacuum, options.test, options.reservations_age) + clean_scheduler_logs(cursor, options.vacuum, options.test, options.logs_age) if options.tag_notifications: clean_notification_tasks(cursor, options.vacuum, options.test, age=options.tag_notifications_age) From d7e6dc771cc8bee82e140cdd97d02a732662c96d Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Thu, 20 Apr 2023 18:40:52 -0400 Subject: [PATCH 51/92] flake8 --- kojihub/scheduler.py | 62 ++++++++++++++++++-------------------------- 1 file changed, 25 insertions(+), 37 deletions(-) diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index 768e27c2..df1be0c5 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -1,11 +1,9 @@ import logging -import psycopg2 import time import koji from . import kojihub from .db import QueryProcessor, InsertProcessor, UpdateProcessor, db_lock -from koji.context import context convert_value = kojihub.convert_value @@ -16,8 +14,8 @@ logger = logging.getLogger('koji.scheduler') def log_db(msg, task_id=None, host_id=None): insert = InsertProcessor( - 'scheduler_log_messages', - data={'msg': msg, 'task_id': task_id, 'host_id': host_id}, + 'scheduler_log_messages', + data={'msg': msg, 'task_id': task_id, 'host_id': host_id}, ) insert.execute() @@ -88,21 +86,12 @@ def get_task_runs(taskID=None, hostID=None, active=None): query = QueryProcessor( columns=fields, aliases=aliases, tables=['scheduler_task_runs'], - # joins=['host ON host_id=host.id', 'task ON task_id=task.id'], + # joins=['host ON host_id=host.id', 'task ON task_id=task.id'], clauses=clauses, values=locals()) return query.execute() -def scheduler_map_task(taskinfo): - # map which hosts can take this task - # eventually this will involve more complex rules - q = QueryProcessor() - # select hosts matching arch and channel - hosts = q.execute() - u = InsertProcessor() - - class TaskScheduler(object): def __init__(self): @@ -174,7 +163,6 @@ class TaskScheduler(object): host['capacity'] - host['_load'] > min_avail): task['_hosts'].append(host) logger.info(f'Task {task["task_id"]}: {len(task["_hosts"])} options') - #import pdb; pdb.set_trace() for host in task['_hosts']: # demand gives us a rough measure of how much overall load is pending for the host host.setdefault('_demand', 0.0) @@ -193,7 +181,8 @@ class TaskScheduler(object): for task in self.free_tasks: min_avail = task['weight'] - self.capacity_overcommit task['_hosts'].sort(key=lambda h: h['_rank']) - logger.debug('Task %i choices: %s', task['task_id'], [(h['name'], "%(_rank).2f" %h) for h in task['_hosts']]) + logger.debug('Task %i choices: %s', task['task_id'], + [(h['name'], "%(_rank).2f" % h) for h in task['_hosts']]) for host in task['_hosts']: if (host['capacity'] - host['_load'] > min_avail and host['_ntasks'] < self.maxjobs): @@ -265,15 +254,15 @@ class TaskScheduler(object): # end stale runs update = UpdateProcessor( - 'scheduler_task_runs', - data={'active': False}, - clauses=['active = TRUE', - '(SELECT id FROM task WHERE task.id=task_id AND state IN %(states)s) IS NULL'], - values={'states': [koji.TASK_STATES[s] for s in ('OPEN', 'ASSIGNED')]}, + 'scheduler_task_runs', + data={'active': False}, + clauses=['active = TRUE', + '(SELECT id FROM task WHERE task.id=task_id AND ' + 'state IN %(states)s) IS NULL'], + values={'states': [koji.TASK_STATES[s] for s in ('OPEN', 'ASSIGNED')]}, ) update.execute() - def check_hosts(self): # sanity check ready status hosts_to_mark = [] @@ -285,15 +274,14 @@ class TaskScheduler(object): log_both('Marking host not ready', host_id=host['id']) if hosts_to_mark: - update = db.UpdateProcessor( - 'host', - data={'ready': False}, - clauses=['host_id IN %(host_ids)s'], - values={'host_ids': [h['id'] for h in hosts_to_mark]}, + update = UpdateProcessor( + 'host', + data={'ready': False}, + clauses=['host_id IN %(host_ids)s'], + values={'host_ids': [h['id'] for h in hosts_to_mark]}, ) update.execute() - def get_active_runs(self): runs = get_task_runs(active=True) runs_by_task = {} @@ -327,7 +315,7 @@ class TaskScheduler(object): columns=fields, aliases=aliases, tables=['task'], clauses=('task.state IN %(states)s', 'task.host_id IS NOT NULL', # should always be set, but... - ), + ), values=values, ) active_tasks = query.execute() @@ -420,10 +408,10 @@ class TaskScheduler(object): # mark any older runs inactive update = UpdateProcessor( - 'scheduler_task_runs', - data={'active': False}, - clauses=['task_id=%(task_id)s', 'active = TRUE'], - values={'task_id': task['task_id']}, + 'scheduler_task_runs', + data={'active': False}, + clauses=['task_id=%(task_id)s', 'active = TRUE'], + values={'task_id': task['task_id']}, ) update.execute() @@ -434,10 +422,10 @@ class TaskScheduler(object): # mark the task assigned update = UpdateProcessor( - 'task', - data={'host_id': host['id'], 'state': koji.TASK_STATES['ASSIGNED']}, - clauses=['id=%(task_id)s', 'state=%(free)s'], - values={'task_id': task['task_id'], 'free': koji.TASK_STATES['FREE']}, + 'task', + data={'host_id': host['id'], 'state': koji.TASK_STATES['ASSIGNED']}, + clauses=['id=%(task_id)s', 'state=%(free)s'], + values={'task_id': task['task_id'], 'free': koji.TASK_STATES['FREE']}, ) update.execute() From 9a4268121a2bb0628b0f2510b76f93597db09fe8 Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Thu, 20 Apr 2023 18:51:32 -0400 Subject: [PATCH 52/92] pull getHostData() from pr3631 --- kojihub/scheduler.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index df1be0c5..2fc51d28 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -60,6 +60,27 @@ def get_tasks_for_host(hostID): return query.execute() +def get_host_data(hostID=None): + """Return actual builder data + + :param int hostID: Return data for given host (otherwise for all) + :returns list[dict]: list of host_id/data dicts + """ + clauses = [] + columns = ['host_id', 'data'] + if hostID is not None: + clauses.append('host_id = %(hostID)i') + query = QueryProcessor( + tables=['scheduler_host_data'], + clauses=clauses, + columns=columns, + values=locals(), + opts={'order': 'host_id'} + ) + + return query.execute() + + def get_task_runs(taskID=None, hostID=None, active=None): taskID = convert_value(taskID, cast=int, none_allowed=True) hostID = convert_value(hostID, cast=int, none_allowed=True) @@ -432,3 +453,4 @@ class TaskScheduler(object): class SchedulerExports: getTaskRuns = staticmethod(get_task_runs) + getHostData = staticmethod(get_host_data) From b19377048c8703f931a21980a5f836d0956b7515 Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Thu, 20 Apr 2023 22:11:04 -0400 Subject: [PATCH 53/92] fix convert_value refs --- kojihub/scheduler.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index 2fc51d28..600acfc1 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -6,9 +6,6 @@ from . import kojihub from .db import QueryProcessor, InsertProcessor, UpdateProcessor, db_lock -convert_value = kojihub.convert_value - - logger = logging.getLogger('koji.scheduler') @@ -37,7 +34,7 @@ def intlist(value): def get_tasks_for_host(hostID): """Get the tasks assigned to a given host""" - hostID = convert_value(hostID, cast=int, none_allowed=True) + hostID = kojihub.convert_value(hostID, cast=int, none_allowed=True) fields = ( ('task.id', 'id'), @@ -82,9 +79,9 @@ def get_host_data(hostID=None): def get_task_runs(taskID=None, hostID=None, active=None): - taskID = convert_value(taskID, cast=int, none_allowed=True) - hostID = convert_value(hostID, cast=int, none_allowed=True) - active = convert_value(active, cast=bool, none_allowed=True) + taskID = kojihub.convert_value(taskID, cast=int, none_allowed=True) + hostID = kojihub.convert_value(hostID, cast=int, none_allowed=True) + active = kojihub.convert_value(active, cast=bool, none_allowed=True) fields = ( ('scheduler_task_runs.id', 'id'), From 1cbd13f73816c248adfd0cb88adc2486284d629d Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Thu, 27 Apr 2023 14:43:23 -0400 Subject: [PATCH 54/92] drop unused var --- kojihub/scheduler.py | 1 - 1 file changed, 1 deletion(-) diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index 600acfc1..90b62f98 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -115,7 +115,6 @@ class TaskScheduler(object): def __init__(self): self.hosts_by_bin = None self.hosts = None - self.tasks_by_bin = None self.active_tasks = None self.free_tasks = None From 5ba62a75bbec302bda6d2b2715e6bc9cbd68ac9a Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Thu, 27 Apr 2023 23:04:41 -0400 Subject: [PATCH 55/92] scheduler check_ts fragment --- kojihub/scheduler.py | 49 +++++++++++++++++++++++++++++++++++++++++++- schemas/schema.sql | 6 ++++++ 2 files changed, 54 insertions(+), 1 deletion(-) diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index 90b62f98..c7ad458d 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -1,3 +1,4 @@ +import json import logging import time @@ -124,12 +125,17 @@ class TaskScheduler(object): self.ready_timeout = 180 self.assign_timeout = 300 self.host_timeout = 900 + self.run_interval = 60 def run(self): if not db_lock('scheduler', wait=False): # already running elsewhere return False + if not self.check_ts(): + # already ran too recently + return False + self.get_tasks() self.get_hosts() self.check_hosts() @@ -138,6 +144,47 @@ class TaskScheduler(object): return True + def check_ts(self): + """Check the last run timestamp + + Returns True if the scheduler should run, False otherwise + """ + + # get last ts + query = QueryProcessor( + tables=['scheduler_sys_data'], + columns=['data'], + clauses=['name = %(name)s'], + values={'name': 'last_run_ts'}, + ) + last = query.singleValue(strict=False) or 0 + + now = time.time() + delta = now - last + + if delta < 0: + logger.error('Last run in the future by %i seconds', -delta) + ret = False + # update the ts so that a system time rollback doesn't keep us from running + elif delta < self.run_interval: + logger.debug('Skipping run due to run_interval setting') + # return now without updating ts + return False + else: + ret = True + + # save current ts + # XXX need an UPSERT + update = UpdateProcessor( + 'scheduler_sys_data', + clauses=['name = %(name)s'], + values={'name': 'last_run_ts'}, + data={'data': json.dumps(now)}, + ) + update.execute() + + return ret + def do_schedule(self): # debug logger.info('Running task scheduler') @@ -294,7 +341,7 @@ class TaskScheduler(object): update = UpdateProcessor( 'host', data={'ready': False}, - clauses=['host_id IN %(host_ids)s'], + clauses=['id IN %(host_ids)s'], values={'host_ids': [h['id'] for h in hosts_to_mark]}, ) update.execute() diff --git a/schemas/schema.sql b/schemas/schema.sql index 9aae6bf7..a1f2a5b9 100644 --- a/schemas/schema.sql +++ b/schemas/schema.sql @@ -1010,6 +1010,12 @@ CREATE TABLE scheduler_host_data ( ) WITHOUT OIDS; +CREATE TABLE scheduler_sys_data ( + name TEXT NOT NULL PRIMARY KEY, + data JSONB +) WITHOUT OIDS; + + CREATE TABLE scheduler_map ( id SERIAL NOT NULL PRIMARY KEY, task_id INTEGER REFERENCES task (id) NOT NULL, From 35952294258d1316dbd3726ee5995e3d060fbb84 Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Fri, 28 Apr 2023 15:02:25 -0400 Subject: [PATCH 56/92] handle first time case for last_run_ts --- kojihub/scheduler.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index c7ad458d..0d7f9283 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -174,14 +174,20 @@ class TaskScheduler(object): ret = True # save current ts - # XXX need an UPSERT update = UpdateProcessor( 'scheduler_sys_data', clauses=['name = %(name)s'], values={'name': 'last_run_ts'}, data={'data': json.dumps(now)}, ) - update.execute() + chk = update.execute() + if not chk: + # hasn't been defined yet + insert = InsertProcessor( + 'scheduler_sys_data', + data={'name': 'last_run_ts', 'data': json.dumps(now)}, + ) + insert.execute() return ret From 58a5e94b208188fa0fc3170dd2850daf12dbee23 Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Tue, 2 May 2023 11:41:23 -0400 Subject: [PATCH 57/92] ... --- kojihub/scheduler.py | 2 +- schemas/schema.sql | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index 0d7f9283..68d17dc6 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -136,6 +136,7 @@ class TaskScheduler(object): # already ran too recently return False + logger.info('Running task scheduler') self.get_tasks() self.get_hosts() self.check_hosts() @@ -193,7 +194,6 @@ class TaskScheduler(object): def do_schedule(self): # debug - logger.info('Running task scheduler') logger.info(f'Hosts: {len(self.hosts)}') logger.info(f'Free tasks: {len(self.free_tasks)}') logger.info(f'Active tasks: {len(self.active_tasks)}') diff --git a/schemas/schema.sql b/schemas/schema.sql index a1f2a5b9..6863da05 100644 --- a/schemas/schema.sql +++ b/schemas/schema.sql @@ -1027,6 +1027,16 @@ CREATE TABLE scheduler_map ( ) WITHOUT OIDS; +CREATE TABLE scheduler_task_refusals ( + id SERIAL NOT NULL PRIMARY KEY, + task_id INTEGER REFERENCES task (id) NOT NULL, + host_id INTEGER REFERENCES host (id) NOT NULL, + by_host BOOLEAN NOT NULL, + msg TEXT, + time TIMESTAMPTZ NOT NULL DEFAULT NOW() +) WITHOUT OIDS; + + CREATE TABLE scheduler_log_messages ( id SERIAL NOT NULL PRIMARY KEY, task_id INTEGER REFERENCES task (id), From 5d0088f9ef66a22275892b0b93b80df87691e422 Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Thu, 4 May 2023 16:59:04 -0400 Subject: [PATCH 58/92] initial task refusal functions --- kojihub/scheduler.py | 46 ++++++++++++++++++++++++++++++++++++++++++++ schemas/schema.sql | 1 + 2 files changed, 47 insertions(+) diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index 68d17dc6..0283c26f 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -58,6 +58,52 @@ def get_tasks_for_host(hostID): return query.execute() +def set_refusal(hostID, taskID, soft=True, by_host=False, msg=''): + data = { + 'task_id': kojihub.convert_value(hostID, cast=int), + 'host_id': kojihub.convert_value(taskID, cast=int), + 'soft': kojihub.convert_value(soft, cast=bool), + 'by_host': kojihub.convert_value(by_host, cast=bool), + 'msg': kojihub.convert_value(msg, cast=str), + } + insert = InsertProcessor('scheduler_task_refusals', data=data) + insert.execute() + # note: db allows multiple entries here, but in general we shouldn't + # make very many + + +def get_task_refusals(taskID=None, hostID=None): + taskID = kojihub.convert_value(taskID, cast=int, none_allowed=True) + hostID = kojihub.convert_value(hostID, cast=int, none_allowed=True) + + fields = ( + ('scheduler_task_refusals.id', 'id'), + ('scheduler_task_refusals.task_id', 'task_id'), + ('scheduler_task_refusals.host_id', 'host_id'), + ('scheduler_task_refusals.by_host', 'by_host'), + ('scheduler_task_refusals.soft', 'soft'), + ('scheduler_task_refusals.msg', 'msg'), + # ('host.name', 'host_name'), + ("date_part('epoch', scheduler_task_refusals.time)", 'ts'), + ) + fields, aliases = zip(*fields) + + clauses = [] + if taskID is not None: + clauses.append('task_id = %(taskID)s') + if hostID is not None: + clauses.append('host_id = %(hostID)s') + + query = QueryProcessor( + columns=fields, aliases=aliases, tables=['scheduler_task_refusals'], + # joins=['host ON host_id=host.id', 'task ON task_id=task.id'], + clauses=clauses, values=locals(), + opts={'order': '-id'} + ) + + return query.execute() + + def get_host_data(hostID=None): """Return actual builder data diff --git a/schemas/schema.sql b/schemas/schema.sql index 6863da05..de97cf69 100644 --- a/schemas/schema.sql +++ b/schemas/schema.sql @@ -1032,6 +1032,7 @@ CREATE TABLE scheduler_task_refusals ( task_id INTEGER REFERENCES task (id) NOT NULL, host_id INTEGER REFERENCES host (id) NOT NULL, by_host BOOLEAN NOT NULL, + soft BOOLEAN NOT NULL DEFAULT FALSE, msg TEXT, time TIMESTAMPTZ NOT NULL DEFAULT NOW() ) WITHOUT OIDS; From 1d28e0b6af6d2e927b4993956732d2278278ea2a Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Thu, 4 May 2023 20:43:02 -0400 Subject: [PATCH 59/92] QueryView fragment --- kojihub/db.py | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/kojihub/db.py b/kojihub/db.py index d5e29df9..50673d7c 100644 --- a/kojihub/db.py +++ b/kojihub/db.py @@ -834,6 +834,80 @@ SELECT %(col_str)s return results +class QueryView: + # abstract base class + + # subclasses should provide... + tables = [] + joins = [] + joinmap = {} + fieldmap = {} + ''' + fieldmap looks like: + + { + 'alias': + ['fullname', 'joinkey', + + } + ''' + + def __init__(self, fields, clauses, values, opts=None): + self.extra_joins = [] + tables = list(self.tables) # copy + fields = self.get_fields(fields) + fields, aliases = zip(*fields.items()) + clauses = self.get_clauses(clauses) + joins = self.get_joins() + self.query = QueryProcessor( + columns=fields, aliases=aliases, + tables=tables, joins=joins, + clauses=clauses, values=values, + opts=opts) + + def get_fields(self): + fields = {} + joins = [] + clauses = [] + + x_joins = set() + for field in self.fields: + f_info = self.fieldmap.get(field) + if f_info is None: + raise koji.ParameterError(f'Invalid field for query {field}') + fullname, joinkey = f_info + fullname = fullname or field + fields[fullname] = field + if joinkey: + x_joins.add(joinkey) + + def get_clauses(self): + pass + + def get_joins(self): + joins = list(self.joins) + seen = set() + # note we preserve the order that extra joins were added + for joinkey in self.extra_joins: + if joinkey in seen: + continue + seen.add(joinkey) + joins.append(self.joinmap[joinkey]) + return joins + + def execute(self): + return self.query.execute() + + def executeOne(self, strict=False): + return self.query.executeOne(strict=strict) + + def iterate(self): + return self.query.iterate() + + def singleValue(self, strict=True): + return self.query.singleValue(strict=strict) + + class BulkInsertProcessor(object): def __init__(self, table, data=None, columns=None, strict=True, batch=1000): """Do bulk inserts - it has some limitations compared to From 03d12044c32e9b800212e52255d16ae2ff79075e Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Thu, 4 May 2023 22:51:46 -0400 Subject: [PATCH 60/92] basic QueryView working --- kojihub/db.py | 60 +++++++++++++++++++++++++++++++------------- kojihub/scheduler.py | 19 +++++++++++++- 2 files changed, 60 insertions(+), 19 deletions(-) diff --git a/kojihub/db.py b/kojihub/db.py index 50673d7c..74f03b96 100644 --- a/kojihub/db.py +++ b/kojihub/db.py @@ -852,8 +852,9 @@ class QueryView: } ''' - def __init__(self, fields, clauses, values, opts=None): + def __init__(self, clauses=None, fields=None, opts=None): self.extra_joins = [] + self.values = {} tables = list(self.tables) # copy fields = self.get_fields(fields) fields, aliases = zip(*fields.items()) @@ -862,27 +863,50 @@ class QueryView: self.query = QueryProcessor( columns=fields, aliases=aliases, tables=tables, joins=joins, - clauses=clauses, values=values, + clauses=clauses, values=self.values, opts=opts) - def get_fields(self): - fields = {} - joins = [] - clauses = [] + def get_fields(self, fields): + fields = fields or self.fieldmap.keys() # XXX stable order - x_joins = set() - for field in self.fields: - f_info = self.fieldmap.get(field) - if f_info is None: - raise koji.ParameterError(f'Invalid field for query {field}') - fullname, joinkey = f_info - fullname = fullname or field - fields[fullname] = field - if joinkey: - x_joins.add(joinkey) + return {self.map_field(f): f for f in fields} - def get_clauses(self): - pass + def map_field(self, field): + f_info = self.fieldmap.get(field) + if f_info is None: + raise koji.ParameterError(f'Invalid field for query {field}') + fullname, joinkey = f_info + fullname = fullname or field + if joinkey: + self.extra_joins.append(joinkey) + return fullname + + def get_clauses(self, clauses): + # for now, just a very simple implementation + result = [] + clauses = clauses or [] + for n, clause in enumerate(clauses): + # TODO checks check checks + if len(clause) == 2: + # implicit operator + field, value = clause + if isinstance(value, (list, tuple)): + op = 'IN' + else: + op = '=' + elif len(clause) == 3: + field, op, value = clause + op = op.upper() + if op not in ('IN', '=', '!=', '>', '<', '>=', '<='): + raise koji.ParameterError(f'Invalid operator: {op}') + else: + raise koji.ParameterError(f'Invalid clause: {clause}') + fullname = self.map_field(field) + key = f'v_{field}_{n}' + self.values[key] = value + result.append(f'{fullname} {op} %({key})s') + + return result def get_joins(self): joins = list(self.joins) diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index 0283c26f..822ad6d2 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -4,7 +4,7 @@ import time import koji from . import kojihub -from .db import QueryProcessor, InsertProcessor, UpdateProcessor, db_lock +from .db import QueryProcessor, InsertProcessor, UpdateProcessor, QueryView, db_lock logger = logging.getLogger('koji.scheduler') @@ -125,6 +125,22 @@ def get_host_data(hostID=None): return query.execute() +class TaskRunsQuery(QueryView): + + tables = ['scheduler_task_runs'] + fieldmap = { + 'id': ['scheduler_task_runs.id', None], + 'task_id': ['scheduler_task_runs.task_id', None], + 'host_id': ['scheduler_task_runs.host_id', None], + 'active': ['scheduler_task_runs.active', None], + 'create_ts': ["date_part('epoch', scheduler_task_runs.create_time)", None], + } + + +def get_task_runs2(clauses=None, fields=None): + return TaskRunsQuery(clauses, fields).execute() + + def get_task_runs(taskID=None, hostID=None, active=None): taskID = kojihub.convert_value(taskID, cast=int, none_allowed=True) hostID = kojihub.convert_value(hostID, cast=int, none_allowed=True) @@ -548,4 +564,5 @@ class TaskScheduler(object): class SchedulerExports: getTaskRuns = staticmethod(get_task_runs) + getTaskRuns2 = staticmethod(get_task_runs2) getHostData = staticmethod(get_host_data) From e7e080791edf8228648afcd8074f18687a261f3f Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Thu, 4 May 2023 23:07:03 -0400 Subject: [PATCH 61/92] ... --- kojihub/db.py | 1 + 1 file changed, 1 insertion(+) diff --git a/kojihub/db.py b/kojihub/db.py index 74f03b96..711619bb 100644 --- a/kojihub/db.py +++ b/kojihub/db.py @@ -879,6 +879,7 @@ class QueryView: fullname = fullname or field if joinkey: self.extra_joins.append(joinkey) + # duplicates removed later return fullname def get_clauses(self, clauses): From 8ea908bfb90d5ecf0212c038a8f161db8737fd3f Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Sun, 7 May 2023 13:15:20 -0400 Subject: [PATCH 62/92] more QueryView --- kojihub/db.py | 14 ++++---------- kojihub/scheduler.py | 12 ++++++++++++ 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/kojihub/db.py b/kojihub/db.py index 711619bb..c812d3aa 100644 --- a/kojihub/db.py +++ b/kojihub/db.py @@ -842,15 +842,7 @@ class QueryView: joins = [] joinmap = {} fieldmap = {} - ''' - fieldmap looks like: - - { - 'alias': - ['fullname', 'joinkey', - - } - ''' + default_fields = () def __init__(self, clauses=None, fields=None, opts=None): self.extra_joins = [] @@ -867,7 +859,9 @@ class QueryView: opts=opts) def get_fields(self, fields): - fields = fields or self.fieldmap.keys() # XXX stable order + fields = fields or self.default_fields + if not fields or fields == '*': + fields = sorted(self.fieldmap.keys()) return {self.map_field(f): f for f in fields} diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index 822ad6d2..a2fa0f78 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -128,13 +128,25 @@ def get_host_data(hostID=None): class TaskRunsQuery(QueryView): tables = ['scheduler_task_runs'] + joinmap = { + 'task': 'task ON scheduler_task_runs.task_id = task.id', + 'host': 'host ON scheduler_task_runs.host_id = host.id', + } fieldmap = { 'id': ['scheduler_task_runs.id', None], 'task_id': ['scheduler_task_runs.task_id', None], + 'method': ['task.method', 'task'], + 'state': ['task.state', 'task'], + 'owner': ['task.owner', 'task'], + 'arch': ['task.arch', 'task'], + 'channel_id': ['task.channel_id', 'task'], + 'host_name': ['host.name', 'host'], + 'host_ready': ['host.ready', 'host'], 'host_id': ['scheduler_task_runs.host_id', None], 'active': ['scheduler_task_runs.active', None], 'create_ts': ["date_part('epoch', scheduler_task_runs.create_time)", None], } + default_fields = ('id', 'task_id', 'host_id') def get_task_runs2(clauses=None, fields=None): From a577984d062c1c11b6d8968545c3e1ca07e37b96 Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Sun, 7 May 2023 13:24:30 -0400 Subject: [PATCH 63/92] use QueryView for get_task_runs --- kojihub/scheduler.py | 39 +++------------------------------------ 1 file changed, 3 insertions(+), 36 deletions(-) diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index a2fa0f78..ab7eba8e 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -146,45 +146,13 @@ class TaskRunsQuery(QueryView): 'active': ['scheduler_task_runs.active', None], 'create_ts': ["date_part('epoch', scheduler_task_runs.create_time)", None], } - default_fields = ('id', 'task_id', 'host_id') + default_fields = ('id', 'task_id', 'host_id', 'active', 'create_ts') -def get_task_runs2(clauses=None, fields=None): +def get_task_runs(clauses=None, fields=None): return TaskRunsQuery(clauses, fields).execute() -def get_task_runs(taskID=None, hostID=None, active=None): - taskID = kojihub.convert_value(taskID, cast=int, none_allowed=True) - hostID = kojihub.convert_value(hostID, cast=int, none_allowed=True) - active = kojihub.convert_value(active, cast=bool, none_allowed=True) - - fields = ( - ('scheduler_task_runs.id', 'id'), - ('scheduler_task_runs.task_id', 'task_id'), - ('scheduler_task_runs.host_id', 'host_id'), - # ('host.name', 'host_name'), - # ('task.method', 'method'), - ('scheduler_task_runs.active', 'active'), - ("date_part('epoch', scheduler_task_runs.create_time)", 'create_ts'), - ) - fields, aliases = zip(*fields) - - clauses = [] - if taskID is not None: - clauses.append('task_id = %(taskID)s') - if hostID is not None: - clauses.append('host_id = %(hostID)s') - if active is not None: - clauses.append('active = %(active)s') - - query = QueryProcessor( - columns=fields, aliases=aliases, tables=['scheduler_task_runs'], - # joins=['host ON host_id=host.id', 'task ON task_id=task.id'], - clauses=clauses, values=locals()) - - return query.execute() - - class TaskScheduler(object): def __init__(self): @@ -427,7 +395,7 @@ class TaskScheduler(object): update.execute() def get_active_runs(self): - runs = get_task_runs(active=True) + runs = get_task_runs([["active", True]]) runs_by_task = {} for run in runs: runs_by_task.setdefault(run['task_id'], []) @@ -576,5 +544,4 @@ class TaskScheduler(object): class SchedulerExports: getTaskRuns = staticmethod(get_task_runs) - getTaskRuns2 = staticmethod(get_task_runs2) getHostData = staticmethod(get_host_data) From fba5abe2edf251eea1ac9cc672f88b9596828d98 Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Sun, 7 May 2023 17:06:26 -0400 Subject: [PATCH 64/92] UpsertProcessor --- kojihub/db.py | 36 +++++++++++++++++++++ kojihub/scheduler.py | 75 +++++++++++++++++++++----------------------- 2 files changed, 71 insertions(+), 40 deletions(-) diff --git a/kojihub/db.py b/kojihub/db.py index c812d3aa..a7cd1cf5 100644 --- a/kojihub/db.py +++ b/kojihub/db.py @@ -444,6 +444,42 @@ class InsertProcessor(object): return _dml(str(self), self.data) +class UpsertProcessor(InsertProcessor): + """Build a basic upsert statement + + table - the table to insert into + data - a dictionary of data to insert (keys = row names) + rawdata - data to insert specified as sql expressions rather than python values + keys - the rows that are the unique keys + skip_dup - if set to true, do nothing on conflict + """ + + def __init__(self, table, data=None, rawdata=None, keys=None, skip_dup=False): + super(UpsertProcessor, self).__init__(table, data=data, rawdata=rawdata) + self.keys = keys + self.skip_dup = skip_dup + if not keys and not skip_dup: + raise ValueError('either keys or skip_dup must be set') + + def __repr__(self): + return "" % vars(self) + + def __str__(self): + insert = super(UpsertProcessor, self).__str__() + parts = [insert] + if self.skip_dup: + parts.append(' ON CONFLICT DO NOTHING') + else: + parts.append(f' ON CONFLICT ({",".join(self.keys)}) DO UPDATE SET ') + # filter out conflict keys from data + data = {k: self.data[k] for k in self.data if k not in self.keys} + rawdata = {k: self.rawdata[k] for k in self.rawdata if k not in self.keys} + assigns = [f"{key} = %({key})s" for key in data] + assigns.extend([f"{key} = ({rawdata[key]})" for key in self.rawdata]) + parts.append(', '.join(sorted(assigns))) + return ''.join(parts) + + class UpdateProcessor(object): """Build an update statement diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index ab7eba8e..6646189e 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -4,7 +4,7 @@ import time import koji from . import kojihub -from .db import QueryProcessor, InsertProcessor, UpdateProcessor, QueryView, db_lock +from .db import QueryProcessor, InsertProcessor, UpsertProcessor, UpdateProcessor, QueryView, db_lock logger = logging.getLogger('koji.scheduler') @@ -72,36 +72,34 @@ def set_refusal(hostID, taskID, soft=True, by_host=False, msg=''): # make very many -def get_task_refusals(taskID=None, hostID=None): - taskID = kojihub.convert_value(taskID, cast=int, none_allowed=True) - hostID = kojihub.convert_value(hostID, cast=int, none_allowed=True) +class TaskRefusalsQuery(QueryView): - fields = ( - ('scheduler_task_refusals.id', 'id'), - ('scheduler_task_refusals.task_id', 'task_id'), - ('scheduler_task_refusals.host_id', 'host_id'), - ('scheduler_task_refusals.by_host', 'by_host'), - ('scheduler_task_refusals.soft', 'soft'), - ('scheduler_task_refusals.msg', 'msg'), - # ('host.name', 'host_name'), - ("date_part('epoch', scheduler_task_refusals.time)", 'ts'), - ) - fields, aliases = zip(*fields) + tables = ['scheduler_task_refusals'] + joinmap = { + 'task': 'task ON scheduler_task_refusals.task_id = task.id', + 'host': 'host ON scheduler_task_refusals.host_id = host.id', + } + fieldmap = { + 'id': ['scheduler_task_refusals.id', None], + 'task_id': ['scheduler_task_refusals.task_id', None], + 'host_id': ['scheduler_task_refusals.host_id', None], + 'by_host': ['scheduler_task_refusals.by_host', None], + 'soft': ['scheduler_task_refusals.soft', None], + 'msg': ['scheduler_task_refusals.msg', None], + 'ts': ["date_part('epoch', scheduler_task_refusals.time)", None], + 'method': ['task.method', 'task'], + 'state': ['task.state', 'task'], + 'owner': ['task.owner', 'task'], + 'arch': ['task.arch', 'task'], + 'channel_id': ['task.channel_id', 'task'], + 'host_name': ['host.name', 'host'], + 'host_ready': ['host.ready', 'host'], + } + default_fields = ('id', 'task_id', 'host_id', 'by_host', 'soft', 'msg', 'ts') - clauses = [] - if taskID is not None: - clauses.append('task_id = %(taskID)s') - if hostID is not None: - clauses.append('host_id = %(hostID)s') - query = QueryProcessor( - columns=fields, aliases=aliases, tables=['scheduler_task_refusals'], - # joins=['host ON host_id=host.id', 'task ON task_id=task.id'], - clauses=clauses, values=locals(), - opts={'order': '-id'} - ) - - return query.execute() +def get_task_refusals(clauses=None, fields=None): + return TaskRefusalsQuery(clauses, fields).execute() def get_host_data(hostID=None): @@ -217,20 +215,13 @@ class TaskScheduler(object): ret = True # save current ts - update = UpdateProcessor( + upsert = UpsertProcessor( 'scheduler_sys_data', - clauses=['name = %(name)s'], - values={'name': 'last_run_ts'}, - data={'data': json.dumps(now)}, + data={'name': 'last_run_ts', + 'data': json.dumps(now)}, + keys=['name'], ) - chk = update.execute() - if not chk: - # hasn't been defined yet - insert = InsertProcessor( - 'scheduler_sys_data', - data={'name': 'last_run_ts', 'data': json.dumps(now)}, - ) - insert.execute() + upsert.execute() return ret @@ -455,6 +446,9 @@ class TaskScheduler(object): self.free_tasks = free_tasks self.active_tasks = active_tasks + def get_refusals(self): + pass + def get_hosts(self): # get hosts and bin them hosts_by_bin = {} @@ -544,4 +538,5 @@ class TaskScheduler(object): class SchedulerExports: getTaskRuns = staticmethod(get_task_runs) + getTaskRefusals = staticmethod(get_task_refusals) getHostData = staticmethod(get_host_data) From b2037c1da0366d1b3ab2a9b54872c6857ca3be1f Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Sun, 7 May 2023 20:36:02 -0400 Subject: [PATCH 65/92] honor refusals --- kojihub/scheduler.py | 38 ++++++++++++++++++++++++++++++++------ schemas/schema.sql | 3 ++- 2 files changed, 34 insertions(+), 7 deletions(-) diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index 6646189e..33b5568f 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -66,10 +66,8 @@ def set_refusal(hostID, taskID, soft=True, by_host=False, msg=''): 'by_host': kojihub.convert_value(by_host, cast=bool), 'msg': kojihub.convert_value(msg, cast=str), } - insert = InsertProcessor('scheduler_task_refusals', data=data) - insert.execute() - # note: db allows multiple entries here, but in general we shouldn't - # make very many + upsert = UpsertProcessor('scheduler_task_refusals', data=data, keys=('task_id', 'host_id')) + upsert.execute() class TaskRefusalsQuery(QueryView): @@ -164,6 +162,7 @@ class TaskScheduler(object): self.capacity_overcommit = 5 self.ready_timeout = 180 self.assign_timeout = 300 + self.soft_refusal_timeout = 900 self.host_timeout = 900 self.run_interval = 60 @@ -258,12 +257,15 @@ class TaskScheduler(object): # figure out which hosts *can* take each task # at the moment this is mostly just bin, but in the future it will be more complex + refusals = self.get_refusals() for task in self.free_tasks: task['_hosts'] = [] min_avail = min(0, task['weight'] - self.capacity_overcommit) + h_refused = refusals.get(task['task_id'], {}) for host in self.hosts_by_bin.get(task['_bin'], []): if (host['ready'] and host['_ntasks'] < self.maxjobs and - host['capacity'] - host['_load'] > min_avail): + host['capacity'] - host['_load'] > min_avail and + host['id'] not in h_refused): task['_hosts'].append(host) logger.info(f'Task {task["task_id"]}: {len(task["_hosts"])} options') for host in task['_hosts']: @@ -447,7 +449,31 @@ class TaskScheduler(object): self.active_tasks = active_tasks def get_refusals(self): - pass + """Get task refusals and clean stale entries""" + refusals = {} + cutoff_ts = time.time() - self.soft_refusal_timeout + to_drop = [] + for row in get_task_refusals(fields=('id', 'task_id', 'host_id', 'soft', 'ts', 'state')): + if ((row['soft'] and row['ts'] < cutoff_ts ) or + koji.TASK_STATES[row['state']] not in ('FREE', 'OPEN', 'ASSIGNED')): + to_drop.append(row['id']) + else: + # index by task and host + refusals.setdefault(row['task_id'], {})[row['host_id']] = row + + if to_drop: + # drop stale entries + delete = DeleteProcessor( + 'scheduler_task_refusals', + clauses=['id IN %(to_drop)s'], + values=locals(), + ) + delete.execute() + + return refusals + + def clean_refusals(self): + update = UpdateProcessor() def get_hosts(self): # get hosts and bin them diff --git a/schemas/schema.sql b/schemas/schema.sql index de97cf69..74addcc9 100644 --- a/schemas/schema.sql +++ b/schemas/schema.sql @@ -1034,7 +1034,8 @@ CREATE TABLE scheduler_task_refusals ( by_host BOOLEAN NOT NULL, soft BOOLEAN NOT NULL DEFAULT FALSE, msg TEXT, - time TIMESTAMPTZ NOT NULL DEFAULT NOW() + time TIMESTAMPTZ NOT NULL DEFAULT NOW(), + UNIQUE (task_id, host_id) ) WITHOUT OIDS; From 9ef308b615b77845fb47882324124c3b51aa9524 Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Sun, 7 May 2023 20:53:25 -0400 Subject: [PATCH 66/92] rework host.refuseTask --- kojihub/kojihub.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/kojihub/kojihub.py b/kojihub/kojihub.py index a8317bf1..b240d478 100644 --- a/kojihub/kojihub.py +++ b/kojihub/kojihub.py @@ -14405,15 +14405,22 @@ class HostExports(object): tasks = query.execute() return tasks - def refuseTask(self, task_id): + def refuseTask(self, task_id, soft=True, msg=''): + soft = convert_value(soft, cast=bool) + msg = convert_value(msg, cast=str) host = Host() host.verify() - # XXX - task = Task(task_id) - task.free(newstate=koji.TASK_STATES['REFUSED']) - return True + if task['host_id'] != host['id']: + logger.warning('Host %s refused unrelated task: %s', host['name'], task['id']) + return + state = koji.TASK_STATES[task['state']] + if state not in ('OPEN', 'ASSIGNED'): + logger.warning('Host %s refused %s task: %s', host['name'], state, task['id']) + return + scheduler.set_refusal(host['id'], task['id'], soft=soft, msg=msg, by_host=True) + task.free() def getHostTasks(self): host = Host() From 8716d8552d74a1bc46034771e7111b0c28e2b386 Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Sun, 7 May 2023 21:03:11 -0400 Subject: [PATCH 67/92] ... --- kojihub/kojihub.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/kojihub/kojihub.py b/kojihub/kojihub.py index b240d478..da9fd205 100644 --- a/kojihub/kojihub.py +++ b/kojihub/kojihub.py @@ -14412,14 +14412,12 @@ class HostExports(object): host.verify() task = Task(task_id) - if task['host_id'] != host['id']: - logger.warning('Host %s refused unrelated task: %s', host['name'], task['id']) + tinfo = task.getInfo(strict=True) + if tinfo['host_id'] != host.id: + logger.warning('Host %s refused unrelated task: %s', host.id, tinfo['id']) return - state = koji.TASK_STATES[task['state']] - if state not in ('OPEN', 'ASSIGNED'): - logger.warning('Host %s refused %s task: %s', host['name'], state, task['id']) - return - scheduler.set_refusal(host['id'], task['id'], soft=soft, msg=msg, by_host=True) + scheduler.set_refusal(host.id, tinfo['id'], soft=soft, msg=msg, by_host=True) + # also free the task task.free() def getHostTasks(self): From 40660bbb42d1c7e8ef1b0db0b85196660c3ef86c Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Sun, 7 May 2023 23:21:53 -0400 Subject: [PATCH 68/92] call host.refuseTask when host check fails --- koji/daemon.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/koji/daemon.py b/koji/daemon.py index fe712036..1d9b313f 100644 --- a/koji/daemon.py +++ b/koji/daemon.py @@ -1429,6 +1429,8 @@ class TaskManager(object): if not valid_host: self.logger.info( 'Skipping task %s (%s) due to host check', task['id'], task['method']) + if task['state'] == koji.TASK_STATES['ASSIGNED']: + self.session.host.refuseTask(task_id, soft=False, msg='failed host check') return False data = self.session.host.openTask(task['id']) if data is None: From eedcc3e82b7a49854e964fe595f84c5a5903d0f2 Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Sun, 7 May 2023 23:45:20 -0400 Subject: [PATCH 69/92] query for scheduler logs --- kojihub/scheduler.py | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index 33b5568f..1275d3bb 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -25,6 +25,34 @@ def log_both(msg, task_id=None, host_id=None, level=logging.INFO): log_db(msg, task_id, host_id) +class LogMessagesQuery(QueryView): + + tables = ['scheduler_log_messages'] + joinmap = { + 'task': 'task ON scheduler_log_messages.task_id = task.id', + 'host': 'host ON scheduler_log_messages.host_id = host.id', + } + fieldmap = { + 'id': ['scheduler_log_messages.id', None], + 'task_id': ['scheduler_log_messages.task_id', None], + 'host_id': ['scheduler_log_messages.host_id', None], + 'msg_ts': ["date_part('epoch', scheduler_log_messages.msg_time)", None], + 'msg': ['scheduler_log_messages.msg', None], + 'method': ['task.method', 'task'], + 'state': ['task.state', 'task'], + 'owner': ['task.owner', 'task'], + 'arch': ['task.arch', 'task'], + 'channel_id': ['task.channel_id', 'task'], + 'host_name': ['host.name', 'host'], + 'host_ready': ['host.ready', 'host'], + } + default_fields = ('id', 'task_id', 'host_id', 'msg', 'msg_ts') + + +def get_log_messages(clauses=None, fields=None): + return LogMessagesQuery(clauses, fields).execute() + + def intlist(value): """Cast value to a list of ints""" if isinstance(value, (list, tuple)): @@ -68,6 +96,7 @@ def set_refusal(hostID, taskID, soft=True, by_host=False, msg=''): } upsert = UpsertProcessor('scheduler_task_refusals', data=data, keys=('task_id', 'host_id')) upsert.execute() + log_both('Host refused task', task_id=taskID, host_id=hostID) class TaskRefusalsQuery(QueryView): @@ -472,9 +501,6 @@ class TaskScheduler(object): return refusals - def clean_refusals(self): - update = UpdateProcessor() - def get_hosts(self): # get hosts and bin them hosts_by_bin = {} @@ -566,3 +592,4 @@ class SchedulerExports: getTaskRuns = staticmethod(get_task_runs) getTaskRefusals = staticmethod(get_task_refusals) getHostData = staticmethod(get_host_data) + getLogMessages = staticmethod(get_log_messages) From 325ecffc172bbd09d7b2fda4627b1c911871a1f9 Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Mon, 8 May 2023 16:10:38 -0400 Subject: [PATCH 70/92] use host.update_ts in kojiweb/hostinfo --- www/kojiweb/index.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/www/kojiweb/index.py b/www/kojiweb/index.py index 35ed162f..0e0b3ae6 100644 --- a/www/kojiweb/index.py +++ b/www/kojiweb/index.py @@ -1780,7 +1780,12 @@ def hostinfo(environ, hostID=None, userID=None): values['host'] = host values['channels'] = channels values['buildroots'] = buildroots - values['lastUpdate'] = server.getLastHostUpdate(host['id'], ts=True) + if 'update_ts' not in host: + # be nice with older hub + # TODO remove this compat workaround after a release + values['lastUpdate'] = server.getLastHostUpdate(host['id'], ts=True) + else: + values['lastUpdate'] = koji.formatTimeLong(host['update_ts']) if environ['koji.currentUser']: values['perms'] = server.getUserPerms(environ['koji.currentUser']['id']) else: From b9a7516943efdd3f9a10a4307969fa65f12f9514 Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Mon, 8 May 2023 16:50:35 -0400 Subject: [PATCH 71/92] basic scheduler log cli --- cli/koji_cli/commands.py | 50 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/cli/koji_cli/commands.py b/cli/koji_cli/commands.py index 770083bb..a4619270 100644 --- a/cli/koji_cli/commands.py +++ b/cli/koji_cli/commands.py @@ -7955,3 +7955,53 @@ def anon_handle_scheduler_info(goptions, session, args): print(host_data[0]['data']) else: print('-') + + +def handle_scheduler_logs(goptions, session, args): + "[monitor] Query scheduler logs" + usage = "usage: %prog scheduler-logs " + parser = OptionParser(usage=get_usage_str(usage)) + parser.add_option("--task", type="int", action="store", + help="Filter by task ID") + parser.add_option("--host", type="str", action="store", + help="Filter by host (name/ID)") + parser.add_option("--from", type="float", action="store", dest="from_ts", + help="Logs from given timestamp") + parser.add_option("--to", type="float", action="store", dest="to_ts", + help="Logs until given timestamp (included)") + (options, args) = parser.parse_args(args) + if len(args) != 0: + parser.error("There are no arguments for this command") + + kwargs = {} + clauses = [] + if options.task: + clauses.append(['task_id', options.task]) + if options.host: + try: + host_id = int(options.host) + except ValueError: + host_id = session.getHost(options.host)['id'] + clauses.append(['host_id', host_id]) + if options.from_ts: + clauses.append(['msg_ts', '>=', options.from_ts]) + if options.to_ts: + clauses.append(['msg_ts', '<', options.to_ts]) + + logs = session.scheduler.getLogMessages(clauses, fields=('task_id', 'host_id', 'host_name', 'msg_ts', 'msg')) + for log in logs: + log['time'] = time.asctime(time.localtime(log['msg_ts'])) + + mask = ("%(task_id)s\t%(host_name)s\t%(time)s\t%(msg)s") + if not goptions.quiet: + h = mask % { + 'task_id': 'Task', + 'host_name': 'Host', + 'time': 'Time', + 'msg': 'Message', + } + print(h) + print('-' * len(h)) + + for log in logs: + print(mask % log) From fc1f706ae91c57e3fa86bc121c8825e906cfd467 Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Tue, 9 May 2023 10:53:56 -0400 Subject: [PATCH 72/92] outer joins for log query --- kojihub/scheduler.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index 1275d3bb..eb91f82d 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -29,8 +29,9 @@ class LogMessagesQuery(QueryView): tables = ['scheduler_log_messages'] joinmap = { - 'task': 'task ON scheduler_log_messages.task_id = task.id', - 'host': 'host ON scheduler_log_messages.host_id = host.id', + # outer joins because these fields can be null + 'task': 'LEFT JOIN task ON scheduler_log_messages.task_id = task.id', + 'host': 'LEFT JOIN host ON scheduler_log_messages.host_id = host.id', } fieldmap = { 'id': ['scheduler_log_messages.id', None], From 81c6cde17a24a9cf9fa46a625457c96c001f9e7f Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Wed, 10 May 2023 17:28:08 -0400 Subject: [PATCH 73/92] drop unused task states --- koji/__init__.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/koji/__init__.py b/koji/__init__.py index 6f1aa837..4d0e7b25 100644 --- a/koji/__init__.py +++ b/koji/__init__.py @@ -194,8 +194,6 @@ TASK_STATES = Enum(( 'CANCELED', 'ASSIGNED', 'FAILED', - 'SCHEDULED', - 'REFUSED', )) BUILD_STATES = Enum(( From edd5abad4080dc774cb61930cfa89930b935363d Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Mon, 22 May 2023 16:25:07 -0400 Subject: [PATCH 74/92] fix import --- kojihub/scheduler.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index eb91f82d..798f6b0e 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -4,7 +4,8 @@ import time import koji from . import kojihub -from .db import QueryProcessor, InsertProcessor, UpsertProcessor, UpdateProcessor, QueryView, db_lock +from .db import QueryProcessor, InsertProcessor, UpsertProcessor, UpdateProcessor, \ + DeleteProcessor, QueryView, db_lock logger = logging.getLogger('koji.scheduler') From fab35bedcbab8568ad87034e345096215587b64b Mon Sep 17 00:00:00 2001 From: Tomas Kopecek Date: Tue, 23 May 2023 11:33:19 +0200 Subject: [PATCH 75/92] fix schema --- schemas/schema.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/schemas/schema.sql b/schemas/schema.sql index 74addcc9..7bed75f1 100644 --- a/schemas/schema.sql +++ b/schemas/schema.sql @@ -1000,7 +1000,7 @@ CREATE INDEX scheduler_task_runs_create_time ON scheduler_task_runs(create_time) CREATE TABLE scheduler_queue ( task_id INTEGER REFERENCES task (id) NOT NULL PRIMARY KEY, groomed BOOLEAN NOT NULL DEFAULT FALSE, - run_id INTEGER REFERENCES scheduler_task_run(id) + run_id INTEGER REFERENCES scheduler_task_runs(id) ) WITHOUT OIDS; From 9b62b2cbea1f6d85129ceeac32054e2cea15bf6d Mon Sep 17 00:00:00 2001 From: Tomas Kopecek Date: Tue, 23 May 2023 11:33:36 +0200 Subject: [PATCH 76/92] fix scheduler-info --- cli/koji_cli/commands.py | 41 +++++++++++++++++++++++++++------------- kojihub/scheduler.py | 2 ++ 2 files changed, 30 insertions(+), 13 deletions(-) diff --git a/cli/koji_cli/commands.py b/cli/koji_cli/commands.py index a4619270..92e7fac4 100644 --- a/cli/koji_cli/commands.py +++ b/cli/koji_cli/commands.py @@ -7901,6 +7901,12 @@ def anon_handle_repoinfo(goptions, session, args): warn("--buildroots option is available with hub 1.33 or newer") +def _format_ts(ts): + if ts: + return time.strftime("%y-%m-%d %H:%M:%S", time.localtime(ts)) + else: + return '' + def anon_handle_scheduler_info(goptions, session, args): """[monitor] Show information about scheduling""" usage = "usage: %prog schedulerinfo [options]" @@ -7908,8 +7914,8 @@ def anon_handle_scheduler_info(goptions, session, args): parser.add_option("-t", "--task", action="store", type=int, default=None, help="Limit data to given task id") parser.add_option("--host", action="store", default=None, - help="Limit data to given builder (name/id)") - parser.add_option("--state", action="store", type='str', default=None, + help="Limit data to given builder id") + parser.add_option("--state", action="store", type='choice', default=None, choices=[x for x in koji.TASK_STATES.keys()], help="Limit data to task state") (options, args) = parser.parse_args(args) @@ -7925,27 +7931,36 @@ def anon_handle_scheduler_info(goptions, session, args): except ValueError: host_id = session.getHost(options.host, strict=True)['id'] - if options.state: - state = koji.TASK_STATES[options.state] - else: - state = None - # get the data - runs = session.scheduler.getTaskRuns(taskID=options.task, hostID=host_id, state=state) - mask = '%(task_id)s\t%(host_id)s\t%(state)s\t%(create_time)s\t%(start_time)s\t%(end_time)s' + clauses = [] + if options.task: + clauses.append(('task_id', options.task)) + if options.host: + clauses.append(('host_id', options.host)) + if options.state: + clauses.append(('state', koji.TASK_STATES[options.state])) + + runs = session.scheduler.getTaskRuns( + clauses=clauses, + fields=('task_id', 'host_name', 'state', 'create_ts', 'start_ts', 'completion_ts') + ) + mask = '%(task_id)-9s %(host_name)-20s %(state)-7s ' \ + '%(create_ts)-17s %(start_ts)-17s %(completion_ts)-17s' if not goptions.quiet: header = mask % { 'task_id': 'Task', 'host_name': 'Host', 'state': 'State', - 'create_time': 'Created', - 'start_time': 'Started', - 'end_time': 'Ended' + 'create_ts': 'Created', + 'start_ts': 'Started', + 'completion_ts': 'Ended', } print(header) print('-' * len(header)) for run in runs: - run['state'] = koji.TASK_STATES[runs['state']] + run['state'] = koji.TASK_STATES[run['state']] + for ts in ('create_ts', 'start_ts', 'completion_ts'): + run[ts] = _format_ts(run[ts]) print(mask % run) if host_id: diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index 798f6b0e..08a572b0 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -172,6 +172,8 @@ class TaskRunsQuery(QueryView): 'host_id': ['scheduler_task_runs.host_id', None], 'active': ['scheduler_task_runs.active', None], 'create_ts': ["date_part('epoch', scheduler_task_runs.create_time)", None], + 'start_ts': ["date_part('epoch', task.start_time)", 'task'], + 'completion_ts': ["date_part('epoch', task.completion_time)", 'task'], } default_fields = ('id', 'task_id', 'host_id', 'active', 'create_ts') From 787ae18bab385ed303a4e5bfb4f8d1ad5785a561 Mon Sep 17 00:00:00 2001 From: Tomas Kopecek Date: Tue, 23 May 2023 12:15:48 +0200 Subject: [PATCH 77/92] use UpsertProcessor --- kojihub/kojihub.py | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/kojihub/kojihub.py b/kojihub/kojihub.py index da9fd205..bd34facc 100644 --- a/kojihub/kojihub.py +++ b/kojihub/kojihub.py @@ -83,6 +83,7 @@ from .db import ( # noqa: F401 QueryProcessor, Savepoint, UpdateProcessor, + UpsertProcessor, _applyQueryOpts, _dml, _fetchSingle, @@ -14370,19 +14371,12 @@ class HostExports(object): """ host = Host() host.verify() - clauses = ['host_id = %(host_id)i'] - values = {'host_id': host.id} - table = 'scheduler_host_data' - query = QueryProcessor(tables=[table], clauses=clauses, values=values, - opts={'countOnly': True}) - if query.singleValue() > 0: - update = UpdateProcessor(table=table, data={'data': hostdata}, - clauses=clauses, values=values) - update.execute() - else: - insert = InsertProcessor(table=table, data={'data': hostdata}, - clauses=clauses, values=values) - insert.execute() + upsert = UpsertProcessor( + table='scheduler_host_data', + keys=['host_id'], + data={'host_id': host.id, 'data': hostdata}, + ) + upsert.execute() def getTasks(self): host = Host() From d2bbd2ce66f072af802cbebd35d8ad77487bc9c4 Mon Sep 17 00:00:00 2001 From: Tomas Kopecek Date: Tue, 23 May 2023 12:36:59 +0200 Subject: [PATCH 78/92] use Task interface for correct state hooks --- kojihub/scheduler.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index 08a572b0..89d89e01 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -583,13 +583,8 @@ class TaskScheduler(object): insert.execute() # mark the task assigned - update = UpdateProcessor( - 'task', - data={'host_id': host['id'], 'state': koji.TASK_STATES['ASSIGNED']}, - clauses=['id=%(task_id)s', 'state=%(free)s'], - values={'task_id': task['task_id'], 'free': koji.TASK_STATES['FREE']}, - ) - update.execute() + task = kojihub.Task(task['task_id']) + task.assign(host['id']) class SchedulerExports: From 0458e0e445a4c4d30a77567940109647c8fcda1f Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Thu, 1 Jun 2023 18:54:45 -0400 Subject: [PATCH 79/92] allow admins to trigger scheduler run with an api call --- kojihub/scheduler.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index 89d89e01..a207a5a6 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -3,6 +3,7 @@ import logging import time import koji +from koji.context import context from . import kojihub from .db import QueryProcessor, InsertProcessor, UpsertProcessor, UpdateProcessor, \ DeleteProcessor, QueryView, db_lock @@ -199,12 +200,12 @@ class TaskScheduler(object): self.host_timeout = 900 self.run_interval = 60 - def run(self): - if not db_lock('scheduler', wait=False): + def run(self, force=False): + if not db_lock('scheduler', wait=force): # already running elsewhere return False - if not self.check_ts(): + if not force and not self.check_ts(): # already ran too recently return False @@ -592,3 +593,14 @@ class SchedulerExports: getTaskRefusals = staticmethod(get_task_refusals) getHostData = staticmethod(get_host_data) getLogMessages = staticmethod(get_log_messages) + + def doRun(self, force=False): + """Run the scheduler + + This is a debug tool and should not normally be needed. + Scheduler runs are regularly triggered by builder checkins + """ + + force = kojihub.convert_value(force, cast=bool) + context.session.assertPerm('admin') + return TaskScheduler().run(force=force) From 9c8a95f3aa6f0cceedb54dd164c59b92a2c2e740 Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Thu, 1 Jun 2023 18:55:21 -0400 Subject: [PATCH 80/92] drop some test code --- kojihub/kojihub.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/kojihub/kojihub.py b/kojihub/kojihub.py index bd34facc..94cbf278 100644 --- a/kojihub/kojihub.py +++ b/kojihub/kojihub.py @@ -10047,15 +10047,6 @@ def _delete_event_id(): class RootExports(object): '''Contains functions that are made available via XMLRPC''' - def TEST(self, fail=False): - scheduler.TaskScheduler().run() - if fail: - raise Exception('DEBUG') - - def TEST2(self, hostID=1): - return Host(hostID).getLoadData() - # return scheduler.get_tasks_for_host(hostID) - def restartHosts(self, priority=5, options=None): """Spawns restartHosts task From 0d6e5091a9d2d75dd6f90bead0e03c40f10b4386 Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Wed, 7 Jun 2023 14:12:47 -0400 Subject: [PATCH 81/92] unify getTasks and getLoadData code --- kojihub/kojihub.py | 34 +++++++++------------------------- kojihub/scheduler.py | 11 +++++++++-- 2 files changed, 18 insertions(+), 27 deletions(-) diff --git a/kojihub/kojihub.py b/kojihub/kojihub.py index 94cbf278..5c60dda5 100644 --- a/kojihub/kojihub.py +++ b/kojihub/kojihub.py @@ -14268,16 +14268,16 @@ class Host(object): def getLoadData(self): """Get load balancing data - This data is relatively small and the necessary load analysis is - relatively complex, so we let the host machines crunch it.""" + This call is here for backwards compatibility. + Originally, it returned broad information about all hosts and tasks so that individual + hosts could make informed decisions about which task to take. + + Now it presents only data for the calling host and the tasks that have been assigned to + it""" + host = get_host(self.id) host['channels'] = [c['id'] for c in list_channels(hostID=self.id)] - tasks = scheduler.get_tasks_for_host(hostID=self.id) - if not tasks: - # try running scheduler - if scheduler.TaskScheduler().run(): - # check again - tasks = scheduler.get_tasks_for_host(hostID=self.id) + tasks = scheduler.get_tasks_for_host(hostID=self.id, retry=True) return [[host], tasks] def isEnabled(self): @@ -14372,23 +14372,7 @@ class HostExports(object): def getTasks(self): host = Host() host.verify() - - query = QueryProcessor( - tables=['scheduler_task_runs'], - clauses=[ - 'host_id = %(host_id)s', - 'state in %(states)s' - ], - values={ - 'host_id': host.id, - 'states': [ - koji.TASK_STATES['SCHEDULED'], - koji.TASK_STATES['ASSIGNED'], - ], - } - ) - tasks = query.execute() - return tasks + return scheduler.get_tasks_for_host(hostID=host.id, retry=True) def refuseTask(self, task_id, soft=True, msg=''): soft = convert_value(soft, cast=bool) diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index a207a5a6..db535cbf 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -64,7 +64,7 @@ def intlist(value): return [int(value)] -def get_tasks_for_host(hostID): +def get_tasks_for_host(hostID, retry=True): """Get the tasks assigned to a given host""" hostID = kojihub.convert_value(hostID, cast=int, none_allowed=True) @@ -86,7 +86,14 @@ def get_tasks_for_host(hostID): values={'hostID': hostID, 'assigned': koji.TASK_STATES['ASSIGNED']}, ) - return query.execute() + tasks = query.execute() + + if not tasks and retry: + # run scheduler and try again + TaskScheduler().run() + tasks = query.execute() + + return tasks def set_refusal(hostID, taskID, soft=True, by_host=False, msg=''): From 7d892964de62fe6e14de8d3c228804e5dcc7bf6c Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Wed, 28 Jun 2023 00:21:16 -0400 Subject: [PATCH 82/92] drop unused code --- kojihub/scheduler.py | 8 -------- schemas/schema.sql | 18 ------------------ 2 files changed, 26 deletions(-) diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index db535cbf..33fd7d2c 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -56,14 +56,6 @@ def get_log_messages(clauses=None, fields=None): return LogMessagesQuery(clauses, fields).execute() -def intlist(value): - """Cast value to a list of ints""" - if isinstance(value, (list, tuple)): - return [int(n) for n in value] - else: - return [int(value)] - - def get_tasks_for_host(hostID, retry=True): """Get the tasks assigned to a given host""" hostID = kojihub.convert_value(hostID, cast=int, none_allowed=True) diff --git a/schemas/schema.sql b/schemas/schema.sql index 7bed75f1..e58ad50c 100644 --- a/schemas/schema.sql +++ b/schemas/schema.sql @@ -997,13 +997,6 @@ CREATE INDEX scheduler_task_runs_host ON scheduler_task_runs(host_id); CREATE INDEX scheduler_task_runs_create_time ON scheduler_task_runs(create_time); -CREATE TABLE scheduler_queue ( - task_id INTEGER REFERENCES task (id) NOT NULL PRIMARY KEY, - groomed BOOLEAN NOT NULL DEFAULT FALSE, - run_id INTEGER REFERENCES scheduler_task_runs(id) -) WITHOUT OIDS; - - CREATE TABLE scheduler_host_data ( host_id INTEGER REFERENCES host (id) PRIMARY KEY, data JSONB @@ -1016,17 +1009,6 @@ CREATE TABLE scheduler_sys_data ( ) WITHOUT OIDS; -CREATE TABLE scheduler_map ( - id SERIAL NOT NULL PRIMARY KEY, - task_id INTEGER REFERENCES task (id) NOT NULL, - host_id INTEGER REFERENCES host (id) NOT NULL, - runnable BOOLEAN NOT NULL, - refused BOOLEAN NOT NULL, - msg TEXT, - check_time TIMESTAMPTZ NOT NULL DEFAULT NOW() -) WITHOUT OIDS; - - CREATE TABLE scheduler_task_refusals ( id SERIAL NOT NULL PRIMARY KEY, task_id INTEGER REFERENCES task (id) NOT NULL, From 3a1ee5cddbb70f21a587f7fc58fc049b29a9d79a Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Wed, 28 Jun 2023 00:33:29 -0400 Subject: [PATCH 83/92] fix unit tests get_ready_hosts function was dropped --- tests/test_hub/test_get_ready_hosts.py | 64 -------------------------- 1 file changed, 64 deletions(-) delete mode 100644 tests/test_hub/test_get_ready_hosts.py diff --git a/tests/test_hub/test_get_ready_hosts.py b/tests/test_hub/test_get_ready_hosts.py deleted file mode 100644 index 7ae8843f..00000000 --- a/tests/test_hub/test_get_ready_hosts.py +++ /dev/null @@ -1,64 +0,0 @@ -import kojihub -import mock -import unittest - -QP = kojihub.QueryProcessor - - -class TestGetReadyHosts(unittest.TestCase): - - def setUp(self): - self.maxDiff = None - self.QueryProcessor = mock.patch('kojihub.kojihub.QueryProcessor', - side_effect=self.getQuery).start() - self.queries = [] - self.query_execute = mock.MagicMock() - - def tearDown(self): - mock.patch.stopall() - - def getQuery(self, *args, **kwargs): - query = QP(*args, **kwargs) - query.execute = self.query_execute - self.queries.append(query) - return query - - def test_valid(self): - hosts = [{'host.id': 1, 'name': 'hostname', 'arches': 'arch123', 'task_load': 0, - 'capacity': 3}, - {'host.id': 2, 'name': 'hostname-2', 'arches': 'arch123', 'task_load': 0, - 'capacity': 3}] - expected_res = [{'host.id': 1, 'name': 'hostname', 'arches': 'arch123', 'task_load': 0, - 'capacity': 3, 'channels': [1]}, - {'host.id': 2, 'name': 'hostname-2', 'arches': 'arch123', 'task_load': 0, - 'capacity': 3, 'channels': [2, 3]} - ] - self.query_execute.side_effect = [hosts, [{'channel_id': 1}], - [{'channel_id': 2}, {'channel_id': 3}]] - result = kojihub.get_ready_hosts() - self.assertEqual(result, expected_res) - self.assertEqual(len(self.queries), 3) - - query = self.queries[0] - self.assertEqual(query.tables, ['host']) - self.assertEqual(query.joins, ['sessions USING (user_id)', - 'host_config ON host.id = host_config.host_id']) - self.assertEqual(query.clauses, ['active IS TRUE', 'enabled IS TRUE', 'expired IS FALSE', - 'master IS NULL', 'ready IS TRUE', - "sessions.update_time > NOW() - '5 minutes'::interval"]) - self.assertEqual(query.values, {}) - self.assertEqual(query.columns, ['arches', 'capacity', 'host.id', 'name', 'task_load']) - - query = self.queries[1] - self.assertEqual(query.tables, ['host_channels']) - self.assertEqual(query.joins, ['channels ON host_channels.channel_id = channels.id']) - self.assertEqual(query.clauses, ['active IS TRUE', 'enabled IS TRUE', 'host_id=%(id)s']) - self.assertEqual(query.values, hosts[0]) - self.assertEqual(query.columns, ['channel_id']) - - query = self.queries[2] - self.assertEqual(query.tables, ['host_channels']) - self.assertEqual(query.joins, ['channels ON host_channels.channel_id = channels.id']) - self.assertEqual(query.clauses, ['active IS TRUE', 'enabled IS TRUE', 'host_id=%(id)s']) - self.assertEqual(query.values, hosts[1]) - self.assertEqual(query.columns, ['channel_id']) From e649e19be5d53de683f4932d3a661ca38302167f Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Wed, 28 Jun 2023 00:38:23 -0400 Subject: [PATCH 84/92] unit test --- tests/test_cli/data/list-commands.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_cli/data/list-commands.txt b/tests/test_cli/data/list-commands.txt index c434b680..ed8a6567 100644 --- a/tests/test_cli/data/list-commands.txt +++ b/tests/test_cli/data/list-commands.txt @@ -136,6 +136,8 @@ monitor commands: edit-notification Edit user's notification list-notifications List user's notifications and blocks remove-notification Remove user's notifications + scheduler-info Show information about scheduling + scheduler-logs Query scheduler logs unblock-notification Unblock user's notification wait-repo Wait for a repo to be regenerated watch-logs Watch logs in realtime From 479a46a4179144582c87fcbc022c6f9086768c71 Mon Sep 17 00:00:00 2001 From: Tomas Kopecek Date: Thu, 3 Aug 2023 13:29:43 +0200 Subject: [PATCH 85/92] schema upgrade --- docs/schema-upgrade-1.33-1.34.sql | 49 +++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 docs/schema-upgrade-1.33-1.34.sql diff --git a/docs/schema-upgrade-1.33-1.34.sql b/docs/schema-upgrade-1.33-1.34.sql new file mode 100644 index 00000000..ad29d0b4 --- /dev/null +++ b/docs/schema-upgrade-1.33-1.34.sql @@ -0,0 +1,49 @@ +-- upgrade script to migrate the Koji database schema +-- from version 1.33 to 1.34 + +BEGIN; + -- scheduler tables + CREATE TABLE scheduler_task_runs ( + id SERIAL NOT NULL PRIMARY KEY, + task_id INTEGER REFERENCES task (id) NOT NULL, + host_id INTEGER REFERENCES host (id) NOT NULL, + active BOOLEAN NOT NULL DEFAULT TRUE, + create_time TIMESTAMPTZ NOT NULL DEFAULT NOW() + ) WITHOUT OIDS; + CREATE INDEX scheduler_task_runs_task ON scheduler_task_runs(task_id); + CREATE INDEX scheduler_task_runs_host ON scheduler_task_runs(host_id); + CREATE INDEX scheduler_task_runs_create_time ON scheduler_task_runs(create_time); + + + CREATE TABLE scheduler_host_data ( + host_id INTEGER REFERENCES host (id) PRIMARY KEY, + data JSONB + ) WITHOUT OIDS; + + + CREATE TABLE scheduler_sys_data ( + name TEXT NOT NULL PRIMARY KEY, + data JSONB + ) WITHOUT OIDS; + + + CREATE TABLE scheduler_task_refusals ( + id SERIAL NOT NULL PRIMARY KEY, + task_id INTEGER REFERENCES task (id) NOT NULL, + host_id INTEGER REFERENCES host (id) NOT NULL, + by_host BOOLEAN NOT NULL, + soft BOOLEAN NOT NULL DEFAULT FALSE, + msg TEXT, + time TIMESTAMPTZ NOT NULL DEFAULT NOW(), + UNIQUE (task_id, host_id) + ) WITHOUT OIDS; + + + CREATE TABLE scheduler_log_messages ( + id SERIAL NOT NULL PRIMARY KEY, + task_id INTEGER REFERENCES task (id), + host_id INTEGER REFERENCES host (id), + msg_time TIMESTAMPTZ NOT NULL DEFAULT NOW(), + msg TEXT NOT NULL + ) WITHOUT OIDS; +COMMIT; From f66a5407e84fa8e27b1f726059a0ea61fb07967a Mon Sep 17 00:00:00 2001 From: Tomas Kopecek Date: Thu, 3 Aug 2023 13:31:00 +0200 Subject: [PATCH 86/92] default values --- kojihub/scheduler.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index 33fd7d2c..e8fb9b0f 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -185,10 +185,10 @@ def get_task_runs(clauses=None, fields=None): class TaskScheduler(object): def __init__(self): - self.hosts_by_bin = None - self.hosts = None - self.active_tasks = None - self.free_tasks = None + self.hosts_by_bin = {} + self.hosts = {} + self.active_tasks = [] + self.free_tasks = [] # TODO these things need proper config self.maxjobs = 15 # XXX From baa432303f3f73071a133da3ad03244807b4d7f1 Mon Sep 17 00:00:00 2001 From: Tomas Kopecek Date: Tue, 15 Aug 2023 10:56:45 +0200 Subject: [PATCH 87/92] remove typo --- util/koji-sweep-db | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/koji-sweep-db b/util/koji-sweep-db index 373bb8e3..8c575154 100755 --- a/util/koji-sweep-db +++ b/util/koji-sweep-db @@ -156,7 +156,7 @@ def clean_scheduler_logs(cursor, vacuum, test, age): rows = query.execute() print(f"Deleting {rows} scheduler log messages") if not test: - delete = DeleteProcessor(table="scheduler_log_messages", clauses=clauses)\ + delete = DeleteProcessor(table="scheduler_log_messages", clauses=[clauses]) delete.execute() if vacuum: cursor.execute("VACUUM ANALYZE scheduler_log_messages") From bd45f7976acc21c50382ab65ad588ac47c187a65 Mon Sep 17 00:00:00 2001 From: Tomas Kopecek Date: Tue, 15 Aug 2023 10:57:45 +0200 Subject: [PATCH 88/92] unify clauses handling --- util/koji-sweep-db | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/util/koji-sweep-db b/util/koji-sweep-db index 8c575154..11a759dc 100755 --- a/util/koji-sweep-db +++ b/util/koji-sweep-db @@ -11,17 +11,17 @@ from kojihub.db import DeleteProcessor, QueryProcessor, BulkInsertProcessor def clean_sessions(cursor, vacuum, test, age, absolute): - clauses = f"(update_time < NOW() - '{age:d} days'::interval)" + clause = f"(update_time < NOW() - '{age:d} days'::interval)" if absolute is not None: - clauses += f"OR (start_time < NOW() - '{absolute:d} days'::interval)" + clause += f"OR (start_time < NOW() - '{absolute:d} days'::interval)" if options.verbose: - query = QueryProcessor(tables=['sessions'], clauses=[clauses], opts={'countOnly': True}) + query = QueryProcessor(tables=['sessions'], clauses=[clause], opts={'countOnly': True}) rows = query.execute() print(f"Deleting {rows} sessions") if not test: - delete = DeleteProcessor(table='sessions', clauses=[clauses]) + delete = DeleteProcessor(table='sessions', clauses=[clause]) delete.execute() if vacuum: cursor.execute("VACUUM ANALYZE sessions") @@ -148,7 +148,7 @@ def clean_buildroots(cursor, vacuum, test): def clean_scheduler_logs(cursor, vacuum, test, age): - clauses = f"(msg_time < NOW() - '{age:d} days'::interval)" + clauses = [f"(msg_time < NOW() - '{age:d} days'::interval)"] if options.verbose: query = QueryProcessor(tables=["scheduler_log_messages"], clauses=clauses, @@ -156,7 +156,7 @@ def clean_scheduler_logs(cursor, vacuum, test, age): rows = query.execute() print(f"Deleting {rows} scheduler log messages") if not test: - delete = DeleteProcessor(table="scheduler_log_messages", clauses=[clauses]) + delete = DeleteProcessor(table="scheduler_log_messages", clauses=clauses) delete.execute() if vacuum: cursor.execute("VACUUM ANALYZE scheduler_log_messages") From 410b252d7707ac9e987cf7e1fcfc1b9e74b6018c Mon Sep 17 00:00:00 2001 From: Tomas Kopecek Date: Tue, 15 Aug 2023 11:00:33 +0200 Subject: [PATCH 89/92] missing lock in db --- docs/schema-upgrade-1.33-1.34.sql | 2 ++ schemas/schema.sql | 1 + 2 files changed, 3 insertions(+) diff --git a/docs/schema-upgrade-1.33-1.34.sql b/docs/schema-upgrade-1.33-1.34.sql index ad29d0b4..ac028939 100644 --- a/docs/schema-upgrade-1.33-1.34.sql +++ b/docs/schema-upgrade-1.33-1.34.sql @@ -46,4 +46,6 @@ BEGIN; msg_time TIMESTAMPTZ NOT NULL DEFAULT NOW(), msg TEXT NOT NULL ) WITHOUT OIDS; + + INSERT INTO locks(name) VALUES('scheduler'); COMMIT; diff --git a/schemas/schema.sql b/schemas/schema.sql index e58ad50c..a007b2cb 100644 --- a/schemas/schema.sql +++ b/schemas/schema.sql @@ -1035,5 +1035,6 @@ CREATE TABLE locks ( name TEXT NOT NULL PRIMARY KEY ) WITHOUT OIDS; INSERT INTO locks(name) VALUES('protonmsg-plugin'); +INSERT INTO locks(name) VALUES('scheduler'); COMMIT WORK; From 0df86294c4a9de54b47c84493f1073e06cba6e61 Mon Sep 17 00:00:00 2001 From: Tomas Kopecek Date: Tue, 15 Aug 2023 11:05:03 +0200 Subject: [PATCH 90/92] wrong variable --- koji/daemon.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/koji/daemon.py b/koji/daemon.py index 1d9b313f..a7a9561f 100644 --- a/koji/daemon.py +++ b/koji/daemon.py @@ -1430,7 +1430,7 @@ class TaskManager(object): self.logger.info( 'Skipping task %s (%s) due to host check', task['id'], task['method']) if task['state'] == koji.TASK_STATES['ASSIGNED']: - self.session.host.refuseTask(task_id, soft=False, msg='failed host check') + self.session.host.refuseTask(task['id'], soft=False, msg='failed host check') return False data = self.session.host.openTask(task['id']) if data is None: From 055fae4176ef9d1c2e18a2b4acd890dd28303f0e Mon Sep 17 00:00:00 2001 From: Tomas Kopecek Date: Tue, 15 Aug 2023 11:06:29 +0200 Subject: [PATCH 91/92] flake8 fixes --- cli/koji_cli/commands.py | 5 +++-- kojihub/scheduler.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/cli/koji_cli/commands.py b/cli/koji_cli/commands.py index 92e7fac4..3cca38f1 100644 --- a/cli/koji_cli/commands.py +++ b/cli/koji_cli/commands.py @@ -7907,6 +7907,7 @@ def _format_ts(ts): else: return '' + def anon_handle_scheduler_info(goptions, session, args): """[monitor] Show information about scheduling""" usage = "usage: %prog schedulerinfo [options]" @@ -7988,7 +7989,6 @@ def handle_scheduler_logs(goptions, session, args): if len(args) != 0: parser.error("There are no arguments for this command") - kwargs = {} clauses = [] if options.task: clauses.append(['task_id', options.task]) @@ -8003,7 +8003,8 @@ def handle_scheduler_logs(goptions, session, args): if options.to_ts: clauses.append(['msg_ts', '<', options.to_ts]) - logs = session.scheduler.getLogMessages(clauses, fields=('task_id', 'host_id', 'host_name', 'msg_ts', 'msg')) + logs = session.scheduler.getLogMessages(clauses, fields=('task_id', 'host_id', 'host_name', + 'msg_ts', 'msg')) for log in logs: log['time'] = time.asctime(time.localtime(log['msg_ts'])) diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index e8fb9b0f..4ed2ea0e 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -487,7 +487,7 @@ class TaskScheduler(object): cutoff_ts = time.time() - self.soft_refusal_timeout to_drop = [] for row in get_task_refusals(fields=('id', 'task_id', 'host_id', 'soft', 'ts', 'state')): - if ((row['soft'] and row['ts'] < cutoff_ts ) or + if ((row['soft'] and row['ts'] < cutoff_ts) or koji.TASK_STATES[row['state']] not in ('FREE', 'OPEN', 'ASSIGNED')): to_drop.append(row['id']) else: From 1b219ee9eacc51891ca683437752b8d9cd3af8dd Mon Sep 17 00:00:00 2001 From: Tomas Kopecek Date: Tue, 15 Aug 2023 16:50:12 +0200 Subject: [PATCH 92/92] configurable scheduler parameters --- kojihub/kojixmlrpc.py | 9 +++++++++ kojihub/scheduler.py | 14 +++++++------- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/kojihub/kojixmlrpc.py b/kojihub/kojixmlrpc.py index f9000ca2..64ff8bca 100644 --- a/kojihub/kojixmlrpc.py +++ b/kojihub/kojixmlrpc.py @@ -503,6 +503,15 @@ def load_config(environ): ['RPMDefaultChecksums', 'string', 'md5 sha256'], ['SessionRenewalTimeout', 'integer', 1440], + + # scheduler options + ['MaxJobs', 'integer', 15], + ['CapacityOvercommit', 'integer', 5], + ['ReadyTimeout', 'integer', 180], + ['AssignTimeout', 'integer', 300], + ['SoftRefusalTimeout', 'integer', 900], + ['HostTimeout', 'integer', 900], + ['RunInterval', 'integer', 60], ] opts = {} for name, dtype, default in cfgmap: diff --git a/kojihub/scheduler.py b/kojihub/scheduler.py index 4ed2ea0e..7718da60 100644 --- a/kojihub/scheduler.py +++ b/kojihub/scheduler.py @@ -191,13 +191,13 @@ class TaskScheduler(object): self.free_tasks = [] # TODO these things need proper config - self.maxjobs = 15 # XXX - self.capacity_overcommit = 5 - self.ready_timeout = 180 - self.assign_timeout = 300 - self.soft_refusal_timeout = 900 - self.host_timeout = 900 - self.run_interval = 60 + self.maxjobs = context.opts['MaxJobs'] + self.capacity_overcommit = context.opts['CapacityOvercommit'] + self.ready_timeout = context.opts['ReadyTimeout'] + self.assign_timeout = context.opts['AssignTimeout'] + self.soft_refusal_timeout = context.opts['SoftRefusalTimeout'] + self.host_timeout = context.opts['HostTimeout'] + self.run_interval = context.opts['RunInterval'] def run(self, force=False): if not db_lock('scheduler', wait=force):