#!/usr/bin/python3 # Koji virtual machine management daemon # Copyright (c) 2010-2014 Red Hat, Inc. # # Koji is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; # version 2.1 of the License. # # This software is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this software; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # # Authors: # Mike Bonnet from __future__ import absolute_import, division import base64 import fnmatch import hashlib import logging import os import pwd import random import signal import socket import subprocess import sys import threading import time from contextlib import closing from optparse import OptionParser import libvirt import libxml2 import requests import koji from koji.xmlrpcplus import dumps, Fault, loads, xmlrpc_client, xmlrpc_server import koji.util from koji.daemon import SCM, TaskManager # TaskHandlers are required to be imported, do not remove them from koji.tasks import ( # noqa: F401 BaseTaskHandler, MultiPlatformTask, RestartTask, # handle restarted vm tasks RestartVerifyTask, # handle restarted vm tasks ServerExit, ServerRestart, WaitrepoTask, # wait for repo regens spawned by kojivmd ) class LibvirtCallable(object): # Wrapper class for libvirt call, used in LibvirtConnection def __init__(self, libvirt_conn, method): self.conn = libvirt_conn self.method = method def __call__(self, *args, **kwargs): max_retries = 3 retry = 0 for retry in range(max_retries): try: return getattr(self.conn._conn, self.method)(*args, **kwargs) except libvirt.libvirtError as ex: if ex.get_error_code() == libvirt.VIR_ERR_INTERNAL_ERROR and retry < max_retries: logging.getLogger("koji.vm").warning( 'Internal libvirt error: %s, retry #%d/%d' % (ex, retry, max_retries)) time.sleep(5) try: self.conn._conn.close() except Exception: pass self.conn._conn = libvirt.open(None) else: raise class LibvirtConnection(object): # Wrapper class for libvirt connection retrying calls failed on libvirt connection # e.g. libvirtd restart. Without it kojivmd stucks in main loop def __init__(self): self._conn = libvirt.open(None) def __getattr__(self, name): if name == '_conn': return self._conn return LibvirtCallable(self, name) # Register libvirt handler def libvirt_callback(ignore, err): if err[3] != libvirt.VIR_ERR_ERROR: # Don't log libvirt errors: global error handler will do that logging.warning("Non-error from libvirt: '%s'", err[2]) libvirt.registerErrorHandler(f=libvirt_callback, ctx=None) def get_options(): """process options from command line and config file""" # parse command line args parser = OptionParser() parser.add_option("-c", "--config", dest="configFile", help="use alternate configuration file", metavar="FILE", default="/etc/kojivmd/kojivmd.conf") parser.add_option("--user", help="specify user") parser.add_option("--password", help="specify password") parser.add_option("-f", "--fg", dest="daemon", action="store_false", default=True, help="run in foreground") parser.add_option("--force-lock", action="store_true", default=False, help="force lock for exclusive session") parser.add_option("-v", "--verbose", action="store_true", default=False, help="show verbose output") parser.add_option("-d", "--debug", action="store_true", default=False, help="show debug output") parser.add_option("--debug-task", action="store_true", default=False, help="enable debug output for tasks") parser.add_option("--debug-xmlrpc", action="store_true", default=False, help="show xmlrpc debug output") parser.add_option("--skip-main", action="store_true", default=False, help="don't actually run main") parser.add_option("--maxjobs", type='int', help="Specify maxjobs") parser.add_option("--sleeptime", type='int', help="Specify the polling interval") parser.add_option("--admin-emails", type='str', action="store", metavar="EMAILS", help="Comma-separated addresses to send error notices to.") parser.add_option("--workdir", help="Specify workdir") parser.add_option("--pluginpath", help="Specify plugin search path") parser.add_option("--plugin", action="append", help="Load specified plugin") parser.add_option("-s", "--server", help="url of XMLRPC server") (options, args) = parser.parse_args() if args: parser.error("incorrect number of arguments") # not reached assert False # pragma: no cover # load local config config = koji.read_config_files(options.configFile) for x in config.sections(): if x != 'kojivmd': quit('invalid section found in config file: %s' % x) defaults = {'sleeptime': 15, 'maxjobs': 5, 'minspace': 8192, 'minmem': 4096, 'vmuser': 'qemu', 'admin_emails': None, 'workdir': '/tmp/koji', 'topurl': '', 'imagedir': '/var/lib/libvirt/images', 'pluginpath': '/usr/lib/koji-vm-plugins', 'privaddr': '192.168.122.1', 'portbase': 7000, 'smtphost': 'example.com', 'from_addr': 'Koji Build System ', 'krb_principal': None, 'host_principal_format': 'compile/%s@EXAMPLE.COM', 'keytab': '/etc/kojivmd/kojivmd.keytab', 'ccache': '/var/tmp/kojivmd.ccache', 'server': None, 'user': None, 'password': None, 'no_ssl_verify': False, 'retry_interval': 60, 'max_retries': 120, 'offline_retry': True, 'offline_retry_interval': 120, 'allowed_scms': '', 'allowed_scms_use_config': True, 'allowed_scms_use_policy': False, 'cert': None, 'serverca': None} if config.has_section('kojivmd'): for name, value in config.items('kojivmd'): if name in ['sleeptime', 'maxjobs', 'minspace', 'minmem', 'retry_interval', 'max_retries', 'offline_retry_interval', 'portbase']: try: defaults[name] = int(value) except ValueError: quit("value for %s option must be a valid integer" % name) elif name in ['offline_retry', 'no_ssl_verify', 'allowed_scms_use_config', 'allowed_scms_use_policy']: defaults[name] = config.getboolean('kojivmd', name) elif name in ['plugin', 'plugins']: defaults['plugin'] = value.split() elif name in defaults.keys(): defaults[name] = value else: quit("unknown config option: %s" % name) for name, value in defaults.items(): if getattr(options, name, None) is None: setattr(options, name, value) # special handling for cert defaults cert_defaults = { 'cert': '/etc/kojivmd/client.crt', 'serverca': '/etc/kojivmd/serverca.crt', } for name in cert_defaults: if getattr(options, name, None) is None: fn = cert_defaults[name] if os.path.exists(fn): setattr(options, name, fn) # make sure workdir exists if not os.path.exists(options.workdir): koji.ensuredir(options.workdir) if not options.server: parser.error("--server argument required") return options def quit(msg=None, code=1): if msg: logging.getLogger("koji.vm").error(msg) sys.stderr.write('%s\n' % msg) sys.stderr.flush() sys.exit(code) def main(options, session): logger = logging.getLogger("koji.vm") logger.info('Starting up') tm = VMTaskManager(options, session) tm.findHandlers(globals()) if options.plugin: # load plugins pt = koji.plugin.PluginTracker(path=options.pluginpath.split(':')) for name in options.plugin: logger.info('Loading plugin: %s', name) tm.scanPlugin(pt.load(name)) def shutdown(*args): raise SystemExit def restart(*args): logger.warning("Initiating graceful restart") tm.restart_pending = True signal.signal(signal.SIGTERM, shutdown) signal.signal(signal.SIGUSR1, restart) taken = False tm.cleanupAllVMs() while True: try: tm.updateBuildroots(nolocal=True) tm.updateTasks() taken = tm.getNextTask() tm.cleanupExpiredVMs() except (SystemExit, ServerExit, KeyboardInterrupt): logger.warning("Exiting") break except ServerRestart: logger.warning("Restarting") os.execv(sys.argv[0], sys.argv) except koji.AuthExpired: logger.error('Session expired') break except koji.RetryError: raise except Exception: # XXX - this is a little extreme # log the exception and continue logger.error('Error in main loop', exc_info=True) try: if not taken: # Only sleep if we didn't take a task, otherwise retry immediately. # The load-balancing code in getNextTask() will prevent a single builder # from getting overloaded. time.sleep(options.sleeptime) except (SystemExit, KeyboardInterrupt): logger.warning("Exiting") break logger.warning("Shutting down, please wait...") tm.shutdown() session.logout() sys.exit(0) #################### # Tasks for handling VM lifecycle #################### class DaemonXMLRPCServer(xmlrpc_server.SimpleXMLRPCServer): allow_reuse_address = True def __init__(self, addr, port): if sys.version_info[:2] <= (2, 4): xmlrpc_server.SimpleXMLRPCServer.__init__(self, (addr, port), logRequests=False) else: xmlrpc_server.SimpleXMLRPCServer.__init__(self, (addr, port), logRequests=False, allow_none=True) self.logger = logging.getLogger('koji.vm.DaemonXMLRPCServer') self.socket.settimeout(5) self.active = True def server_close(self): self.active = False xmlrpc_server.SimpleXMLRPCServer.server_close(self) def handle_while_active(self): while self.active: try: conn, (ipaddr, port) = self.get_request() self.logger.debug('request from %s:%s', ipaddr, port) if self.verify_request(conn, (ipaddr, port)): try: self.process_request(conn, (ipaddr, port)) finally: self.close_request(conn) except socket.timeout: pass except Exception: self.logger.error('Error handling requests', exc_info=True) if sys.version_info[:2] <= (2, 4): # Copy and paste from SimpleXMLRPCServer, with the addition of passing # allow_none=True to xmlrpclib.dumps() def _marshaled_dispatch(self, data, dispatch_method=None): params, method = loads(data) try: if dispatch_method is not None: response = dispatch_method(method, params) else: response = self._dispatch(method, params) response = (response,) response = dumps(response, methodresponse=1, allow_none=True) except Fault as fault: response = dumps(fault) except Exception: # report exception back to server response = dumps( Fault(1, "%s:%s" % (sys.exc_info()[0], sys.exc_info()[1])) ) return response class WinBuildTask(MultiPlatformTask): """ Spawns a vmExec task to run a build, and imports the output. """ Methods = ['winbuild'] _taskWeight = 0.2 def handler(self, name, source_url, target, opts=None): if not opts: opts = {} subopts = koji.util.dslice(opts, ['winspec', 'patches'], strict=False) # winspec and patches options are urls # verify the urls before passing them to the VM for url in [source_url] + koji.util.to_list(subopts.values()): scm = SCM(url) scm.assert_allowed(allowed=self.options.allowed_scms, session=self.session, by_config=self.options.allowed_scms_use_config, by_policy=self.options.allowed_scms_use_policy, policy_data={ 'user_id': self.taskinfo['owner'], 'channel': self.session.getChannel(self.taskinfo['channel_id'], strict=True)['name'], 'scratch': opts.get('scratch') }) task_info = self.session.getTaskInfo(self.id) target_info = self.session.getBuildTarget(target) if not target_info: raise koji.BuildError('unknown build target: %s' % target) dest_tag = self.session.getTag(target_info['dest_tag'], strict=True) build_tag = self.session.getTag(target_info['build_tag'], strict=True) repo_id = opts.get('repo_id') if repo_id: repo_info = session.repoInfo(repo_id) event_id = repo_info['create_event'] if not repo_info: raise koji.BuildError('invalid repo ID: %s' % repo_id) policy_data = { 'user_id': task_info['owner'], 'source': source_url, 'task_id': self.id, 'build_tag': build_tag['id'], 'skip_tag': bool(opts.get('skip_tag')), 'target': target_info['id'] } if not opts.get('skip_tag'): policy_data['tag'] = dest_tag['id'] self.session.host.assertPolicy('build_from_repo_id', policy_data) else: repo_info = self.getRepo(build_tag['id']) repo_id = repo_info['id'] event_id = None subopts['repo_id'] = repo_id task_opts = koji.util.dslice(opts, ['timeout', 'cpus', 'mem', 'static_mac'], strict=False) task_id = self.session.host.subtask(method='vmExec', arglist=[name, [source_url, build_tag['name'], subopts], task_opts], label=name[:255], parent=self.id) results = self.wait(task_id)[task_id] results['task_id'] = task_id build_info = None if not opts.get('scratch'): build_info = koji.util.dslice(results, ['name', 'version', 'release', 'epoch']) build_info['package_name'] = build_info['name'] pkg_cfg = self.session.getPackageConfig(dest_tag['id'], build_info['name'], event=event_id) if not opts.get('skip_tag'): # Make sure package is on the list for this tag if pkg_cfg is None: raise koji.BuildError("package %s not in list for tag %s" % (build_info['name'], dest_tag['name'])) elif pkg_cfg['blocked']: raise koji.BuildError("package %s is blocked for tag %s" % (build_info['name'], dest_tag['name'])) build_info = self.session.host.initWinBuild(self.id, build_info, koji.util.dslice(results, ['platform'])) build_id = build_info['id'] try: rpm_results = None spec_url = opts.get('specfile') if spec_url: rpm_results = self.buildWrapperRPM(spec_url, task_id, target_info, build_info, repo_id, channel='default') if opts.get('scratch'): self.session.host.moveWinBuildToScratch(self.id, results, rpm_results) else: self.session.host.completeWinBuild(self.id, build_id, results, rpm_results) except (SystemExit, ServerExit, KeyboardInterrupt): # we do not trap these raise except Exception: if not opts.get('scratch'): # scratch builds do not get imported self.session.host.failBuild(self.id, build_id) # reraise the exception raise if not opts.get('scratch') and not opts.get('skip_tag'): tag_task_id = self.session.host.subtask(method='tagBuild', arglist=[dest_tag['id'], build_id], label='tag', channel='default', parent=self.id) self.wait(tag_task_id) class VMExecTask(BaseTaskHandler): """ Handles the startup, state-tracking, and shutdown of a VM for the purposes for executing a single task. """ Methods = ['vmExec'] _taskWeight = 3.0 CLONE_PREFIX = 'koji-clone-' QCOW2_EXT = '.qcow2' def __init__(self, *args, **kw): super(VMExecTask, self).__init__(*args, **kw) self.task_manager = xmlrpc_client.ServerProxy( 'http://%s:%s/' % (self.options.privaddr, self.options.portbase), allow_none=True) self.port = None self.server = None self.task_info = None self.buildreq_dir = os.path.join(self.workdir, 'buildreqs') koji.ensuredir(self.buildreq_dir) self.output_dir = os.path.join(self.workdir, 'output') koji.ensuredir(self.output_dir) self.output = None self.success = None def mkqcow2(self, clone_name, source_disk, disk_num): new_name = clone_name + '-disk-' + str(disk_num) + self.QCOW2_EXT new_path = os.path.join(self.options.imagedir, new_name) cmd = ['/usr/bin/qemu-img', 'create', '-f', 'qcow2', '-o', 'backing_file=%s' % source_disk, '-F', 'qcow2', new_path] proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True) output, dummy = proc.communicate() ret = proc.wait() if ret: raise koji.BuildError( 'unable to create qcow2 image, "%s" returned %s; output was: %s' % (' '.join(cmd), ret, output)) vm_user = pwd.getpwnam(self.options.vmuser) os.chown(new_path, vm_user.pw_uid, vm_user.pw_gid) return new_path def updateXML(self, xml, opts): """Update the VM xml to reflect the task options""" doc = libxml2.parseDoc(xml) ctx = doc.xpathNewContext() if opts.get('cpus'): cpus = opts['cpus'] cpu_node = ctx.xpathEval('/domain/vcpu')[0] if str(cpus) != cpu_node.getContent(): cpu_node.setContent(str(cpus)) if opts.get('mem'): mem = opts['mem'] # mem is in mbytes, libvirt expects kbytes mem = mem * 1024 mem_node = ctx.xpathEval('/domain/memory')[0] if mem > int(mem_node.getContent()): mem_node.setContent(str(mem)) curr_mem_node = ctx.xpathEval('/domain/currentMemory')[0] if str(mem) != curr_mem_node.getContent(): curr_mem_node.setContent(str(mem)) if opts.get('static_mac'): mac = opts['static_mac'] mac_node = ctx.xpathEval('/domain/devices/interface/mac')[0] mac_node.setProp('address', mac) fixed_xml = str(doc) ctx.xpathFreeContext() doc.freeDoc() return fixed_xml def clone(self, conn, name, opts): """ Clone the VM named "name" and return the name of the cloned VM. All disks will be qcow2 images backed by the storage of the original VM. The original VM must be shutdown, or this will raise an error. """ clone_name = self.CLONE_PREFIX + str(self.id) + '-' + name clone_name = clone_name[:50] orig_vm = conn.lookupByName(name) orig_paths = self.guestDisks(orig_vm) cmd = ["virt-clone", "--original", name, "--name", clone_name, "--preserve-data"] for idx, orig_disk in enumerate(orig_paths): new_disk = self.mkqcow2(clone_name, orig_disk, idx) cmd += ["--file", new_disk] if opts.get('static_mac'): orig_mac = self.macAddr(orig_vm) # doesn't work anymore, needs to place mac address inside the xml # cmd += ["--mac", orig_mac] opts['static_mac'] = orig_mac proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True) output, dummy = proc.communicate() ret = proc.wait() if ret: raise koji.BuildError('unable to clone VM: ' '"%s" returned %s; output was: %s' % (' '.join(cmd), ret, output)) # Set the cpus and mem parameters directly in the XML. newvm = conn.lookupByName(clone_name) clone_xml = self.updateXML(newvm.XMLDesc(0), opts) conn.defineXML(clone_xml) return clone_name def guestDisks(self, vm): """ Parse cloneable disks out of the original VM XML """ doc = libxml2.parseDoc(vm.XMLDesc(0)) ctx = doc.xpathNewContext() ret = [] nodelist = ctx.xpathEval('/domain/devices/disk[@device="disk" and @type="file"]/source') for node in nodelist: for prop in node.properties: if prop.name in ["file", "dev"]: ret.append(node.prop(prop.name)) break ctx.xpathFreeContext() doc.freeDoc() return ret def macAddr(self, vm): """ Return the MAC address of the first network interface configured for the given VM. """ doc = libxml2.parseDoc(vm.XMLDesc(0)) ctx = doc.xpathNewContext() nodelist = ctx.xpathEval('/domain/devices/interface[@type="network"]/mac') if not nodelist: raise koji.BuildError('no network interfaces configured for %s' % vm.name()) addr = nodelist[0].prop('address') ctx.xpathFreeContext() doc.freeDoc() return addr def getTaskInfo(self): """ Get the command-line to run in the VM. """ return self.task_info def initBuildroot(self, repo_id, platform): """ Create the buildroot object on the hub. """ # we're using platform as the arch, which is currently limited to # 16 characters by the database schema buildroot_id = self.session.host.newBuildRoot(repo_id, platform[:16], task_id=self.id) # a VM doesn't require any additional initialization, so move it from INIT to BUILDING self.session.host.setBuildRootState(buildroot_id, 'BUILDING', task_id=self.id) return buildroot_id def updateBuildrootFiles(self, buildroot_id, files, rpms): """ Update the list of files that were downloaded into the build environment. """ if files: self.session.host.updateBuildrootArchives(buildroot_id, self.id, files, project=True) if rpms: self.session.host.updateBuildRootList(buildroot_id, rpms, task_id=self.id) def expireBuildroot(self, buildroot_id): """ Set the buildroot to the expired state. """ return self.session.host.setBuildRootState(buildroot_id, 'EXPIRED', task_id=self.id) def getLatestBuild(self, tag, package, repo_id): """ Get information about the latest build of package "package" in tag "tag". """ repo_info = self.session.repoInfo(repo_id, strict=True) builds = self.session.getLatestBuilds(tag, package=package, event=repo_info['create_event']) if not builds: raise koji.BuildError('no build of package %s in tag %s' % (package, tag)) build = builds[0] maven_build = self.session.getMavenBuild(build['id']) if maven_build: del maven_build['build_id'] build.update(maven_build) win_build = self.session.getWinBuild(build['id']) if win_build: del win_build['build_id'] build.update(win_build) return build def getFileList(self, buildID, type, typeopts): """ Get the list of files of "type" for the latest build of the package "package" in tag "tag". typeopts is a dict that is used to filter the file list. typeopts is checked for: patterns: comma-separated list of path/filename patterns (as used by fnmatch) to filter the results with If type is 'rpm', typeopts is checked for: arches: comma-separated list of arches to include in output If type is 'maven', typeopts is checked for: group_ids: Maven group IDs to include in the output artifact_ids: Maven artifact IDs to include in the output versions: Maven versions to include in the output If type is 'win', typeopts is checked for: platforms: comma-separated list of platforms flags: comma-separated list of flags """ if not typeopts: typeopts = {} if type == 'rpm': arches = None if typeopts.get('arches'): arches = typeopts['arches'].split(',') files = self.session.listRPMs(buildID=buildID, arches=arches) else: files = self.session.listArchives(buildID=buildID, type=type) for fileinfo in files: if type == 'rpm': filepath = koji.pathinfo.rpm(fileinfo) elif type == 'maven': filepath = koji.pathinfo.mavenfile(fileinfo) elif type == 'win': filepath = koji.pathinfo.winfile(fileinfo) else: # XXX support other file types when available filepath = fileinfo['filename'] fileinfo['localpath'] = filepath if typeopts.get('patterns'): to_filter = files files = [] patterns = typeopts['patterns'].split(',') for fileinfo in to_filter: for pattern in patterns: if fnmatch.fnmatch(fileinfo['localpath'], pattern): files.append(fileinfo) break if type == 'maven': if typeopts.get('group_ids'): group_ids = typeopts['group_ids'].split(',') files = [f for f in files if f['group_id'] in group_ids] if typeopts.get('artifact_ids'): artifact_ids = typeopts['artifact_ids'].split(',') files = [f for f in files if f['artifact_id'] in artifact_ids] if typeopts.get('versions'): versions = typeopts['versions'].split(',') files = [f for f in files if f['version'] in versions] if type == 'win': if typeopts.get('platforms'): platforms = typeopts['platforms'].split(',') files = [f for f in files if set(f['platforms'].split()).intersection(platforms)] if typeopts.get('flags'): flags = typeopts['flags'].split(',') files = [f for f in files if set(f['flags'].split()).intersection(flags)] return files def localCache(self, buildinfo, fileinfo, type): """ Access a file in the local cache. If the file does not exist, it's downloaded from the server. Returns an open file object. """ # fileinfo['localpath'] is set by getFileList() localpath = os.path.join(self.buildreq_dir, buildinfo['name'], type, fileinfo['localpath']) if not os.path.isfile(localpath): remote_pi = koji.PathInfo(self.options.topurl) if type == 'rpm': remote_url = remote_pi.build(buildinfo) + '/' + \ fileinfo['localpath'] elif type == 'maven': remote_url = remote_pi.mavenbuild(buildinfo) + '/' + \ fileinfo['localpath'] elif type == 'win': remote_url = remote_pi.winbuild(buildinfo) + '/' + \ fileinfo['localpath'] else: raise koji.BuildError('unsupported file type: %s' % type) koji.ensuredir(os.path.dirname(localpath)) # closing needs to be used for requests < 2.18.0 # nosec - skipping missing timeout, it would be done on VM lifecycle level with closing(requests.get(remote_url, stream=True)) as response: # nosec response.raise_for_status() with open(localpath, 'wb') as f: for chunk in response.iter_content(chunk_size=65536): f.write(chunk) if type == 'rpm': # rpm, check sigmd5. It is enough, as if content is broken, # rpm will fail later hdr = koji.get_rpm_header(localpath) payloadhash = koji.hex_string(koji.get_header_field(hdr, 'sigmd5')) if fileinfo['payloadhash'] != payloadhash: raise koji.BuildError( "Downloaded rpm %s doesn't match checksum (expected: %s, got %s)" % (os.path.basename(fileinfo['localpath']), fileinfo['payloadhash'], payloadhash)) if not koji.util.check_sigmd5(localpath): raise koji.BuildError("Downloaded rpm %s doesn't match sigmd5" % os.path.basename(fileinfo['localpath'])) else: self.verifyChecksum(localpath, fileinfo['checksum'], koji.CHECKSUM_TYPES[fileinfo['checksum_type']]) return open(localpath, 'rb') def getFile(self, buildinfo, archiveinfo, offset, length, type): """ Get the contents of the file indicated by fileinfo, returning a maximum of "length" bytes starting at "offset". Contents are returned base64-encoded. """ offset = int(offset) length = int(length) fileobj = self.localCache(buildinfo, archiveinfo, type) try: fileobj.seek(offset) data = fileobj.read(length) encoded = base64.b64encode(data).decode() del data return encoded finally: fileobj.close() def upload(self, path, offset, contents): local_path = os.path.abspath(os.path.join(self.output_dir, path)) if not local_path.startswith(self.output_dir): raise koji.BuildError('invalid upload path: %s' % path) koji.ensuredir(os.path.dirname(local_path)) # accept offset as a str to avoid problems with files larger than 2**32 offset = int(offset) if offset == 0: if os.path.exists(local_path): raise koji.BuildError('cannot overwrite %s' % local_path) fobj = open(local_path, 'wb') else: if not os.path.isfile(local_path): raise koji.BuildError('% does not exist' % local_path) size = os.path.getsize(local_path) if offset != size: raise koji.BuildError('cannot write to %s at offset %s, size is %s' % (local_path, offset, size)) fobj = open(local_path, 'rb+') fobj.seek(offset) data = base64.b64decode(contents) fobj.write(data) fobj.close() return len(data) def uploadDirect(self, filepath, offset, size, checksum, data): """ Upload contents directly to the server. """ remotepath = os.path.dirname(os.path.join(self.getUploadDir(), filepath)) filename = os.path.basename(filepath) self.session.uploadFile(remotepath, filename, size, checksum, offset, data) def verifyChecksum(self, path, checksum, algo='sha1'): local_path = os.path.abspath(os.path.join(self.output_dir, path)) if not local_path.startswith(self.output_dir) and \ not local_path.startswith(self.buildreq_dir): raise koji.BuildError('invalid path: %s' % path) if not os.path.isfile(local_path): raise koji.BuildError('%s does not exist' % local_path) if algo == 'sha1': sum = hashlib.sha1() # nosec elif algo == 'md5': sum = koji.util.md5_constructor() elif algo == 'sha256': sum = hashlib.sha256() else: raise koji.BuildError('unsupported checksum algorithm: %s' % algo) with open(local_path, 'rb') as f: while True: data = f.read(1048576) if not data: break sum.update(data) if sum.hexdigest() == checksum: return True else: raise koji.BuildError( '%s checksum validation failed for %s, %s (computed) != %s (provided)' % (algo, local_path, sum.hexdigest(), checksum)) def closeTask(self, output): self.output = output self.success = True return True def failTask(self, output): self.output = output self.success = False return True def setupTaskServer(self): """ Setup the task-specific xmlrpc server to listen to requests from the VM. """ self.server = DaemonXMLRPCServer(self.options.privaddr, self.port) self.server.register_function(self.getTaskInfo) self.server.register_function(self.closeTask) self.server.register_function(self.failTask) self.server.register_function(self.initBuildroot) self.server.register_function(self.updateBuildrootFiles) self.server.register_function(self.expireBuildroot) self.server.register_function(self.getLatestBuild) self.server.register_function(self.getFileList) self.server.register_function(self.getFile) self.server.register_function(self.upload) self.server.register_function(self.uploadDirect) self.server.register_function(self.verifyChecksum) thr = threading.Thread(name='task_%s_thread' % self.id, target=self.server.handle_while_active) thr.daemon = True thr.start() def handler(self, name, task_info, opts=None): """ Clone the VM named "name", and provide the data in "task_info" to it. Available options: - timeout (int): number of minutes to let the VM run before destroying it and failing the task, default: 1440 """ if not opts: opts = {} timeout = opts.get('timeout', 1440) self.task_info = task_info conn = libvirt.open(None) clone_name = self.clone(conn, name, opts) self.logger.debug('Cloned VM %s to %s', name, clone_name) try: vm = conn.lookupByName(clone_name) macaddr = self.macAddr(vm) registered = False while not registered: # loop in case the port is already taken self.port = self.options.portbase + random.randint(1, 100) registered = self.task_manager.registerVM(macaddr, clone_name, self.id, self.port) self.setupTaskServer() vm.create() self.logger.info('Started VM %s', clone_name) except libvirt.libvirtError as e: self.logger.error('error starting VM %s', clone_name, exc_info=True) raise koji.PreBuildError('error starting VM %s, error was: %s' % (clone_name, e)) start = time.time() while True: time.sleep(15) info = vm.info() if info[0] in (libvirt.VIR_DOMAIN_CRASHED, libvirt.VIR_DOMAIN_SHUTOFF): self.logger.warning('VM %s crashed', clone_name) self.server.server_close() raise koji.BuildError('VM %s crashed' % clone_name) if self.success is None: # task is still running # make sure it hasn't exceeded the timeout mins = (time.time() - start) // 60 if mins > timeout: vm.destroy() self.server.server_close() raise koji.BuildError( 'Task did not complete after %.2f minutes, VM %s has been destroyed' % (mins, clone_name)) else: vm.destroy() self.server.server_close() self.uploadTree(self.output_dir) if self.success: return self.output else: raise koji.BuildError(self.output) class VMTaskManager(TaskManager): def __init__(self, options, session): super(VMTaskManager, self).__init__(options, session) self.libvirt_conn = LibvirtConnection() self.macaddrs = {} self.macaddr_lock = threading.Lock() self.expired_vms = {} self.setupServer() def registerVM(self, macaddr, vm_name, task_id, port): """ Register a VM instance with the task manager. """ self.macaddr_lock.acquire() try: macaddr = macaddr.lower() ports = [d[2] for d in self.macaddrs.values()] if port in ports: return False if macaddr in self.macaddrs: raise koji.PreBuildError('duplicate MAC address: %s' % macaddr) self.macaddrs[macaddr] = (vm_name, task_id, port) self.logger.info( 'registered MAC address %s for VM %s (task ID %s, port %s)', macaddr, vm_name, task_id, port) return True finally: self.macaddr_lock.release() def getPort(self, macaddr): """ Get the port that the daemon associated with VM with the given MAC address is listening on. """ self.macaddr_lock.acquire() try: macaddr = macaddr.lower() data = self.macaddrs.get(macaddr) if data: return data[2] else: raise koji.PreBuildError('unknown MAC address: %s' % macaddr) finally: self.macaddr_lock.release() def setupServer(self): self.server = DaemonXMLRPCServer(self.options.privaddr, self.options.portbase) self.server.register_function(self.registerVM) self.server.register_function(self.getPort) thr = threading.Thread(name='manager_thread', target=self.server.handle_while_active) thr.daemon = True thr.start() def getCloneDisks(self, vm): doc = libxml2.parseDoc(vm.XMLDesc(0)) ctx = doc.xpathNewContext() nodelist = ctx.xpathEval('/domain/devices/disk[@device="disk" and @type="file"]/source') disks = [] for node in nodelist: disk = node.prop('file') if os.path.basename(disk).startswith(VMExecTask.CLONE_PREFIX) and \ disk.endswith(VMExecTask.QCOW2_EXT): disks.append(disk) ctx.xpathFreeContext() doc.freeDoc() return disks def checkDisk(self): if not os.path.exists(self.options.imagedir): self.logger.error('No such directory: %s', self.options.imagedir) raise IOError('No such directory: %s' % self.options.imagedir) fs_stat = os.statvfs(self.options.imagedir) available = fs_stat.f_bavail * fs_stat.f_bsize availableMB = available // 1024 // 1024 self.logger.debug('disk space available in %s: %i MB', self.options.imagedir, availableMB) if availableMB < self.options.minspace: self.status = 'Insufficient disk space: %i MB, %i MB required' % \ (availableMB, self.options.minspace) self.logger.warning(self.status) return False return True def checkMem(self): phys_mem = os.sysconf('SC_PHYS_PAGES') * os.sysconf('SC_PAGE_SIZE') // 1024 vm_mem = 0 for vm_id in self.libvirt_conn.listDomainsID(): vm = self.libvirt_conn.lookupByID(vm_id) info = vm.info() # info[1] is the max. memory allocatable to the VM, and info[2] is the amount of # memory currently used by the VM (in kbytes). We're interested in the latter. vm_mem += info[2] avail_mem = phys_mem - vm_mem # options.minmem is listed in mbytes min_mem = self.options.minmem * 1024 self.logger.debug('physical mem: %sk, allocated mem: %sk, available mem: %sk', phys_mem, vm_mem, avail_mem) if avail_mem < min_mem: self.status = 'Insufficient memory: %sk allocated, %sk available, %sk required' % \ (vm_mem, avail_mem, min_mem) self.logger.warning(self.status) return False return True def checkSpace(self): """See if we have enough space to accept another job""" return self.checkDisk() and self.checkMem() def checkRelAvail(self, bin_avail, avail): """ Always return True, since we may be the only daemon with access to the VM required to process this task. """ return True def takeTask(self, task): """ Verify that this builder can handle the task before claiming it. """ if task['method'] == 'vmExec': task_info = self.session.getTaskInfo(task['id'], request=True) vm_name = task_info['request'][0] try: self.libvirt_conn.lookupByName(vm_name) except libvirt.libvirtError as e: if e.get_error_code() == libvirt.VIR_ERR_NO_DOMAIN: # if this builder does not have the requested VM, # we can't handle the task self.logger.debug('VM %s not available, ignoring task %i', vm_name, task['id']) return False else: raise return super(VMTaskManager, self).takeTask(task) def cleanupVM(self, vm_name): """ Cleanup a single VM with the given name. """ try: vm = self.libvirt_conn.lookupByName(vm_name) except libvirt.libvirtError: # if we can't find the VM by name, it has probably been cleaned up manually self.logger.warning("Can't find %s, assuming it has already been cleaned up", vm_name) return True info = vm.info() if info[0] not in (libvirt.VIR_DOMAIN_SHUTOFF, libvirt.VIR_DOMAIN_CRASHED): vm.destroy() self.logger.info('Shut down VM %s', vm_name) disks = self.getCloneDisks(vm) for disk in disks: try: if os.path.isfile(disk): os.unlink(disk) self.logger.debug('Removed disk file %s for VM %s', disk, vm_name) except Exception: self.logger.error('Error removing disk file %s for VM %s', disk, vm_name, exc_info=True) return False else: # Removed all the disks successfully, so undefine the VM vm.undefineFlags(libvirt.VIR_DOMAIN_UNDEFINE_NVRAM) self.logger.info('Cleaned up VM %s', vm_name) return True def cleanupAllVMs(self): """ Cleanup shutdown and clean up all cloned Koji VMs. Only called once at daemon startup, so we start with a clean slate. """ vms = self.libvirt_conn.listDefinedDomains() + self.libvirt_conn.listDomainsID() for vm_name in vms: if isinstance(vm_name, int): vm_name = self.libvirt_conn.lookupByID(vm_name).name() if vm_name.startswith(VMExecTask.CLONE_PREFIX): self.cleanupVM(vm_name) def cleanupExpiredVMs(self): for vm_name, task in list(self.expired_vms.items()): if task['state'] == koji.TASK_STATES['FAILED']: if time.time() - task['completion_ts'] < 3600 * 4: # task failed, so we'll keep the VM image around for 4 hours # for debugging purposes continue ret = self.cleanupVM(vm_name) if ret: # successfully cleaned up the VM, so remove it from the expired list del self.expired_vms[vm_name] def cleanupTask(self, task_id, wait=True): ret = super(VMTaskManager, self).cleanupTask(task_id, wait) self.macaddr_lock.acquire() try: if ret: for macaddr, (vm_name, id, port) in list(self.macaddrs.items()): if task_id == id: self.expired_vms[vm_name] = self.session.getTaskInfo(task_id) del self.macaddrs[macaddr] self.logger.info('unregistered MAC address %s', macaddr) break return ret finally: self.macaddr_lock.release() def shutdown(self): self.server.server_close() self.libvirt_conn.close() super(VMTaskManager, self).shutdown() #################### # Boilerplate startup code #################### if __name__ == "__main__": koji.add_file_logger("koji", "/var/log/kojivmd.log") # note we're setting logging params for all of koji* options = get_options() if options.debug: logging.getLogger("koji").setLevel(logging.DEBUG) elif options.verbose: logging.getLogger("koji").setLevel(logging.INFO) else: logging.getLogger("koji").setLevel(logging.WARN) if options.debug_task: logging.getLogger("koji.build.BaseTaskHandler").setLevel(logging.DEBUG) if options.admin_emails: koji.add_mail_logger("koji", options.admin_emails) # start a session and login session_opts = koji.grab_session_options(options) session = koji.ClientSession(options.server, session_opts) if options.cert and os.path.isfile(options.cert): try: # authenticate using SSL client certificates session.ssl_login(options.cert, None, options.serverca) except koji.AuthError as e: quit("Error: Unable to log in: %s" % e) except requests.exceptions.ConnectionError: quit("Error: Unable to connect to server %s" % (options.server)) elif options.user: try: # authenticate using user/password session.login() except koji.AuthError: quit("Error: Unable to log in. Bad credentials?") except requests.exceptions.ConnectionError: quit("Error: Unable to connect to server %s" % (options.server)) else: krb_principal = options.krb_principal if krb_principal is None: krb_principal = options.host_principal_format % socket.getfqdn() session.gssapi_login(principal=krb_principal, keytab=options.keytab, ccache=options.ccache) # make session exclusive try: session.exclusiveSession(force=options.force_lock) except koji.AuthLockError: quit("Error: Unable to get lock. Trying using --force-lock") if not session.logged_in: quit("Error: Unknown login error") # make sure it works try: ret = session.echo("OK") except requests.exceptions.ConnectionError: quit("Error: Unable to connect to server %s" % (options.server)) if ret != ["OK"]: quit("Error: incorrect server response: %r" % (ret)) # run main if options.daemon: # detach koji.daemonize() main(options, session) elif not options.skip_main: koji.add_stderr_logger("koji") main(options, session)