From 69d841a63b6be03e7cf5fcc2fc3fc2eccbecadf9 Mon Sep 17 00:00:00 2001 From: Mike McLean Date: Fri, 22 Feb 2008 11:26:27 -0500 Subject: [PATCH] make ClientSession retries more configurable, and more robust add an offline mode to the hub (ServerOffline fault) report offline status if db connection fails adjust retry timings for kojid and kojira --- builder/kojid | 14 +++++++++++--- hub/kojixmlrpc.py | 21 ++++++++++++++++++++- koji/__init__.py | 37 ++++++++++++++++++++++++++++++++----- util/kojira | 11 ++++++++--- 4 files changed, 71 insertions(+), 12 deletions(-) diff --git a/builder/kojid b/builder/kojid index e80a0c62..c4bab101 100755 --- a/builder/kojid +++ b/builder/kojid @@ -2656,6 +2656,10 @@ def get_options(): 'server': None, 'user': None, 'password': None, + 'retry_interval': 60, + 'max_retries': 120, + 'offline_retry': True, + 'offline_retry_interval': 120, 'pkgurl': None, 'allowed_scms': '', 'cert': '/etc/kojid/client.crt', @@ -2663,7 +2667,8 @@ def get_options(): 'serverca': '/etc/kojid/serverca.crt'} if config.has_section('kojid'): for name, value in config.items('kojid'): - if name in ['sleeptime', 'maxjobs', 'minspace']: + if name in ['sleeptime', 'maxjobs', 'minspace', 'retry_interval', + 'max_retries', 'offline_retry_interval']: try: defaults[name] = int(value) except ValueError: @@ -2712,8 +2717,11 @@ if __name__ == "__main__": #build session options session_opts = {} - for k in ('user','password','debug_xmlrpc', 'debug'): - session_opts[k] = getattr(options,k) + for k in ('user','password','debug_xmlrpc', 'debug', + 'retry_interval', 'max_retries', 'offline_retry', 'offline_retry_interval'): + v = getattr(options, k, None) + if v is not None: + session_opts[k] = v #start a session and login session = koji.ClientSession(options.server, session_opts) if os.path.isfile(options.cert): diff --git a/hub/kojixmlrpc.py b/hub/kojixmlrpc.py index db737829..4aee2ec6 100644 --- a/hub/kojixmlrpc.py +++ b/hub/kojixmlrpc.py @@ -246,6 +246,18 @@ class ModXMLRPCRequestHandler(object): req.write(response) +def offline_reply(req, msg=None): + """Send a ServerOffline reply""" + faultCode = koji.ServerOffline.faultCode + if msg is None: + faultString = "server is offline" + else: + faultString = msg + response = dumps(Fault(faultCode, faultString)) + req.content_type = "text/xml" + req.set_content_length(len(response)) + req.write(response) + # # mod_python handler # @@ -267,6 +279,9 @@ def handler(req, profiling=False): else: opts = req.get_options() try: + if opts.get("ServerOffline", False): + offline_reply(req, msg=opts.get("OfflineMessage", None)) + return apache.OK context._threadclear() context.commit_pending = False context.opts = opts @@ -274,7 +289,11 @@ def handler(req, profiling=False): koji.db.provideDBopts(database = opts["DBName"], user = opts["DBUser"], host = opts.get("DBhost",None)) - context.cnx = koji.db.connect(opts.get("KojiDebug",False)) + try: + context.cnx = koji.db.connect(opts.get("KojiDebug",False)) + except Exception: + offline_reply(req, msg="database outage") + return apache.OK functions = RootExports() hostFunctions = HostExports() h = ModXMLRPCRequestHandler() diff --git a/koji/__init__.py b/koji/__init__.py index 3f173de3..bb1a49df 100644 --- a/koji/__init__.py +++ b/koji/__init__.py @@ -256,6 +256,10 @@ class FunctionDeprecated(GenericError): """Raised by a deprecated function""" faultCode = 1013 +class ServerOffline(GenericError): + """Raised when the server is offline""" + faultCode = 1014 + #A function to get create an exception from a fault def convertFault(fault): """Convert a fault to the corresponding Exception type, if possible""" @@ -1020,7 +1024,7 @@ def openRemoteFile(relpath, topurl=None, topdir=None): fn = "%s/%s" % (topdir, relpath) fo = open(fn) else: - raise koji.GenericError, "No access method for remote file: %s" % relpath + raise GenericError, "No access method for remote file: %s" % relpath return fo @@ -1306,12 +1310,35 @@ class ClientSession(object): tries += 1 try: return proxy.__getattr__(name)(*args) + #basically, we want to retry on most errors, with a few exceptions + # - faults (this means the call completed and failed) + # - SystemExit, KeyboardInterrupt + # note that, for logged-in sessions the server should tell us (via a RetryError fault) + # if the call cannot be retried. For non-logged-in sessions, all calls should be read-only + # and hence retryable. except Fault, fault: - raise convertFault(fault) - except (socket.error,socket.sslerror,xmlrpclib.ProtocolError,OpenSSL.SSL.Error), e: + err = convertFault(fault) + if isinstance(err, ServerOffline): + if self.opts.get('offline_retry',False): + secs = self.opts.get('offline_retry_interval', interval) + if debug: + self.logger.debug("Server offline. Retrying in %i seconds" % secs) + time.sleep(secs) + #reset try count - this isn't a typical error, this is a running server + #correctly reporting an outage + tries = 0 + continue + raise err + except (SystemExit, KeyboardInterrupt): + #(depending on the python version, these may or may not be subclasses of Exception) + raise + except Exception, e: if not self.logged_in: - raise - elif debug: + #in the past, non-logged-in sessions did not retry. For compatibility purposes + #this behavior is governed by the anon_retry opt. + if not self.opts.get('anon_retry',False): + raise + if debug: self.logger.debug("Try #%d for call %d (%s) failed: %s" % (tries, self.callnum, name, e)) time.sleep(interval) raise RetryError, "reached maximum number of retries, last call failed with: %s" % ''.join(traceback.format_exception_only(*sys.exc_info()[:2])) diff --git a/util/kojira b/util/kojira index 2c200892..8105ce0c 100755 --- a/util/kojira +++ b/util/kojira @@ -419,6 +419,10 @@ def get_options(): 'logfile': '/var/log/kojira.log', 'principal': None, 'keytab': None, + 'retry_interval': 60, + 'max_retries': 120, + 'offline_retry': True, + 'offline_retry_interval': 120, 'prune_batch_size': 4, 'delete_batch_size': 3, 'max_repo_tasks' : 10, @@ -429,9 +433,9 @@ def get_options(): } if config.has_section(section): int_opts = ('prune_batch_size', 'deleted_repo_lifetime', 'max_repo_tasks', - 'delete_batch_size') + 'delete_batch_size', 'retry_interval', 'max_retries', 'offline_retry_interval') str_opts = ('topdir','server','user','password','logfile', 'principal', 'keytab', 'cert', 'ca', 'serverca') - bool_opts = ('with_src','verbose','debug','ignore_stray_repos') + bool_opts = ('with_src','verbose','debug','ignore_stray_repos', 'offline_retry') for name in config.options(section): if name in int_opts: defaults[name] = config.getint(section, name) @@ -486,7 +490,8 @@ if __name__ == "__main__": else: logger.setLevel(logging.WARNING) session_opts = {} - for k in ('user', 'password', 'debug_xmlrpc', 'debug'): + for k in ('user', 'password', 'debug_xmlrpc', 'debug', + 'retry_interval', 'max_retries', 'offline_retry', 'offline_retry_interval'): session_opts[k] = getattr(options,k) session = koji.ClientSession(options.server,session_opts) if os.path.isfile(options.cert):