make ClientSession retries more configurable, and more robust

add an offline mode to the hub (ServerOffline fault)
report offline status if db connection fails
adjust retry timings for kojid and kojira
This commit is contained in:
Mike McLean 2008-02-22 11:32:27 -05:00
parent 58b7313d0a
commit 54f79ff665
4 changed files with 71 additions and 12 deletions

View file

@ -2565,6 +2565,10 @@ def get_options():
'server': None,
'user': None,
'password': None,
'retry_interval': 60,
'max_retries': 120,
'offline_retry': True,
'offline_retry_interval': 120,
'pkgurl': None,
'allowed_scms': '',
'cert': '/etc/kojid/client.crt',
@ -2572,7 +2576,8 @@ def get_options():
'serverca': '/etc/kojid/serverca.crt'}
if config.has_section('kojid'):
for name, value in config.items('kojid'):
if name in ['sleeptime', 'maxjobs', 'minspace']:
if name in ['sleeptime', 'maxjobs', 'minspace', 'retry_interval',
'max_retries', 'offline_retry_interval']:
try:
defaults[name] = int(value)
except ValueError:
@ -2621,8 +2626,11 @@ if __name__ == "__main__":
#build session options
session_opts = {}
for k in ('user','password','debug_xmlrpc', 'debug'):
session_opts[k] = getattr(options,k)
for k in ('user','password','debug_xmlrpc', 'debug',
'retry_interval', 'max_retries', 'offline_retry', 'offline_retry_interval'):
v = getattr(options, k, None)
if v is not None:
session_opts[k] = v
#start a session and login
session = koji.ClientSession(options.server, session_opts)
if os.path.isfile(options.cert):

View file

@ -246,6 +246,18 @@ class ModXMLRPCRequestHandler(object):
req.write(response)
def offline_reply(req, msg=None):
"""Send a ServerOffline reply"""
faultCode = koji.ServerOffline.faultCode
if msg is None:
faultString = "server is offline"
else:
faultString = msg
response = dumps(Fault(faultCode, faultString))
req.content_type = "text/xml"
req.set_content_length(len(response))
req.write(response)
#
# mod_python handler
#
@ -267,6 +279,9 @@ def handler(req, profiling=False):
else:
opts = req.get_options()
try:
if opts.get("ServerOffline", False):
offline_reply(req, msg=opts.get("OfflineMessage", None))
return apache.OK
context._threadclear()
context.commit_pending = False
context.opts = opts
@ -274,7 +289,11 @@ def handler(req, profiling=False):
koji.db.provideDBopts(database = opts["DBName"],
user = opts["DBUser"],
host = opts.get("DBhost",None))
context.cnx = koji.db.connect(opts.get("KojiDebug",False))
try:
context.cnx = koji.db.connect(opts.get("KojiDebug",False))
except Exception:
offline_reply(req, msg="database outage")
return apache.OK
functions = RootExports()
hostFunctions = HostExports()
h = ModXMLRPCRequestHandler()

View file

@ -256,6 +256,10 @@ class FunctionDeprecated(GenericError):
"""Raised by a deprecated function"""
faultCode = 1013
class ServerOffline(GenericError):
"""Raised when the server is offline"""
faultCode = 1014
#A function to get create an exception from a fault
def convertFault(fault):
"""Convert a fault to the corresponding Exception type, if possible"""
@ -1020,7 +1024,7 @@ def openRemoteFile(relpath, topurl=None, topdir=None):
fn = "%s/%s" % (topdir, relpath)
fo = open(fn)
else:
raise koji.GenericError, "No access method for remote file: %s" % relpath
raise GenericError, "No access method for remote file: %s" % relpath
return fo
@ -1306,12 +1310,35 @@ class ClientSession(object):
tries += 1
try:
return proxy.__getattr__(name)(*args)
#basically, we want to retry on most errors, with a few exceptions
# - faults (this means the call completed and failed)
# - SystemExit, KeyboardInterrupt
# note that, for logged-in sessions the server should tell us (via a RetryError fault)
# if the call cannot be retried. For non-logged-in sessions, all calls should be read-only
# and hence retryable.
except Fault, fault:
raise convertFault(fault)
except (socket.error,socket.sslerror,xmlrpclib.ProtocolError,OpenSSL.SSL.Error), e:
err = convertFault(fault)
if isinstance(err, ServerOffline):
if self.opts.get('offline_retry',False):
secs = self.opts.get('offline_retry_interval', interval)
if debug:
self.logger.debug("Server offline. Retrying in %i seconds" % secs)
time.sleep(secs)
#reset try count - this isn't a typical error, this is a running server
#correctly reporting an outage
tries = 0
continue
raise err
except (SystemExit, KeyboardInterrupt):
#(depending on the python version, these may or may not be subclasses of Exception)
raise
except Exception, e:
if not self.logged_in:
raise
elif debug:
#in the past, non-logged-in sessions did not retry. For compatibility purposes
#this behavior is governed by the anon_retry opt.
if not self.opts.get('anon_retry',False):
raise
if debug:
self.logger.debug("Try #%d for call %d (%s) failed: %s" % (tries, self.callnum, name, e))
time.sleep(interval)
raise RetryError, "reached maximum number of retries, last call failed with: %s" % ''.join(traceback.format_exception_only(*sys.exc_info()[:2]))

View file

@ -420,6 +420,10 @@ def get_options():
'logfile': '/var/log/kojira.log',
'principal': None,
'keytab': None,
'retry_interval': 60,
'max_retries': 120,
'offline_retry': True,
'offline_retry_interval': 120,
'prune_batch_size': 4,
'delete_batch_size': 3,
'max_repo_tasks' : 10,
@ -430,9 +434,9 @@ def get_options():
}
if config.has_section(section):
int_opts = ('prune_batch_size', 'deleted_repo_lifetime', 'max_repo_tasks',
'delete_batch_size')
'delete_batch_size', 'retry_interval', 'max_retries', 'offline_retry_interval')
str_opts = ('topdir','server','user','password','logfile', 'principal', 'keytab', 'cert', 'ca', 'serverca')
bool_opts = ('with_src','verbose','debug')
bool_opts = ('with_src','verbose','debug', 'offline_retry')
for name in config.options(section):
if name in int_opts:
defaults[name] = config.getint(section, name)
@ -485,7 +489,8 @@ if __name__ == "__main__":
else:
logger.setLevel(logging.WARNING)
session_opts = {}
for k in ('user', 'password', 'debug_xmlrpc', 'debug'):
for k in ('user', 'password', 'debug_xmlrpc', 'debug',
'retry_interval', 'max_retries', 'offline_retry', 'offline_retry_interval'):
session_opts[k] = getattr(options,k)
session = koji.ClientSession(options.server,session_opts)
if os.path.isfile(options.cert):