debian-koji/vm/kojikamid

#!/usr/bin/python

# Koji daemon that runs in a Windows VM and executes commands associated
# with a task.
# Copyright (c) 2010 Red Hat
#
#    Koji is free software; you can redistribute it and/or
#    modify it under the terms of the GNU Lesser General Public
#    License as published by the Free Software Foundation;
#    version 2.1 of the License.
#
#    This software is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
#    Lesser General Public License for more details.
#
#    You should have received a copy of the GNU Lesser General Public
#    License along with this software; if not, write to the Free Software
#    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
#
# Authors:
#       Mike Bonnet <mikeb@redhat.com>

# To register this script as a service on Windows 2008 (with Cygwin 1.7.5 installed) run:
#   kojiwind --install
# in a cygwin shell.

import datetime
from optparse import OptionParser
from ConfigParser import ConfigParser
import os
import subprocess
import sys
import tempfile
import time
import urlparse
import xmlrpclib
import base64
import hashlib
import traceback

MANAGER_PORT = 7000

##############################
# Begin heinous copy and paste
##############################

class GenericError(Exception):
    """Base class for our custom exceptions"""
    faultCode = 1000
    fromFault = False
    def __str__(self):
        try:
            return str(self.args[0]['args'][0])
        except:
            try:
                return str(self.args[0])
            except:
                return str(self.__dict__)

class BuildError(GenericError):
    """Raised when a build fails"""
    faultCode = 1005

class SCM(object):
    "SCM abstraction class"

    types = { 'CVS': ('cvs://',),
              'CVS+SSH': ('cvs+ssh://',),
              'GIT': ('git://', 'git+http://', 'git+https://', 'git+rsync://'),
              'GIT+SSH': ('git+ssh://',),
              'SVN': ('svn://', 'svn+http://', 'svn+https://'),
              'SVN+SSH': ('svn+ssh://',) }

    def is_scm_url(url):
        """
        Return True if the url appears to be a valid, accessible source location, False otherwise
        """
        for schemes in SCM.types.values():
            for scheme in schemes:
                if url.startswith(scheme):
                    return True
        else:
            return False
    is_scm_url = staticmethod(is_scm_url)

    def __init__(self, url):
        """
        Initialize the SCM object using the specified url.
        The expected url format is:

        scheme://[user@]host/path/to/repo?path/to/module#revision_or_tag_identifier

        The initialized SCM object will have the following attributes:
        - url (the unmodified url)
        - scheme
        - user (may be null)
        - host
        - repository
        - module
        - revision
        - scmtype

        The exact format of each attribute is SCM-specific, but the structure of the url
        must conform to the template above, or an error will be raised.
        """
        if not SCM.is_scm_url(url):
            raise GenericError, 'Invalid SCM URL: %s' % url

        self.url = url
        scheme, user, host, path, query, fragment = self._parse_url()

        self.scheme = scheme
        self.user = user
        self.host = host
        self.repository = path
        self.module = query
        self.revision = fragment

        for scmtype, schemes in SCM.types.items():
            if self.scheme in schemes:
                self.scmtype = scmtype
                break
        else:
            # should never happen
            raise GenericError, 'Invalid SCM URL: %s' % url

    def _parse_url(self):
        """
        Parse the SCM url into usable components.
        Return the following tuple:

        (scheme, user, host, path, query, fragment)

        user may be None, everything else will have a value
        """
        # get the url's scheme
        scheme = self.url.split('://')[0] + '://'

        # replace the scheme with http:// so that the urlparse works in all cases
        dummyurl = self.url.replace(scheme, 'http://', 1)
        dummyscheme, netloc, path, params, query, fragment = urlparse.urlparse(dummyurl)

        user = None
        userhost = netloc.split('@')
        if len(userhost) == 2:
            user = userhost[0]
            if not user:
                # Don't return an empty string
                user = None
            elif ':' in user:
                raise GenericError, 'username:password format not supported: %s' % user
            netloc = userhost[1]
        elif len(userhost) > 2:
            raise GenericError, 'Invalid username@hostname specified: %s' % netloc

        # ensure that path and query do not end in /
        if path.endswith('/'):
            path = path[:-1]
        if query.endswith('/'):
            query = query[:-1]

        # check for validity: params should be empty, query may be empty, everything else should be populated
        if params or not (scheme and netloc and path and fragment):
            raise GenericError, 'Unable to parse SCM URL: %s' % self.url

        # return parsed values
        return (scheme, user, netloc, path, query, fragment)

    def checkout(self, scmdir, logfile):
        """
        Checkout the module from SCM.  Accepts the following parameters:
         - scmdir: the working directory

        Returns the directory that the module was checked-out into (a subdirectory of scmdir)
        """
        # TODO: sanity check arguments
        sourcedir = '%s/%s' % (scmdir, self.module)

        update_checkout_cmd = None
        update_checkout_dir = None

        if self.scmtype == 'CVS':
            pserver = ':pserver:%s@%s:%s' % ((self.user or 'anonymous'), self.host, self.repository)
            module_checkout_cmd = ['cvs', '-d', pserver, 'checkout', '-r', self.revision, self.module]

        elif self.scmtype == 'CVS+SSH':
            if not self.user:
                raise BuildError, 'No user specified for repository access scheme: %s' % self.scheme

            cvsserver = ':ext:%s@%s:%s' % (self.user, self.host, self.repository)
            module_checkout_cmd = ['cvs', '-d', cvsserver, 'checkout', '-r', self.revision, self.module]

        elif self.scmtype == 'GIT':
            scheme = self.scheme
            if '+' in scheme:
                scheme = scheme.split('+')[1]
            gitrepo = '%s%s%s' % (scheme, self.host, self.repository)
            checkout_path = os.path.basename(self.repository)
            if self.repository.endswith('/.git'):
                checkout_path = os.path.basename(self.repository[:-5])
            elif self.repository.endswith('.git'):
                checkout_path = os.path.basename(self.repository[:-4])

            sourcedir = '%s/%s' % (scmdir, checkout_path)
            module_checkout_cmd = ['git', 'clone', '-n', gitrepo, sourcedir]
            update_checkout_cmd = ['git', 'reset', '--hard', self.revision]
            update_checkout_dir = sourcedir

            # self.module may be empty, in which case the specfile should be in the top-level directory
            if self.module:
                # Treat the module as a directory inside the git repository
                sourcedir = '%s/%s' % (sourcedir, self.module)

        elif self.scmtype == 'GIT+SSH':
            if not self.user:
                raise BuildError, 'No user specified for repository access scheme: %s' % self.scheme
            gitrepo = 'git+ssh://%s@%s%s' % (self.user, self.host, self.repository)
            checkout_path = os.path.basename(self.repository)
            if self.repository.endswith('/.git'):
                checkout_path = os.path.basename(self.repository[:-5])
            elif self.repository.endswith('.git'):
                checkout_path = os.path.basename(self.repository[:-4])

            sourcedir = '%s/%s' % (scmdir, checkout_path)
            module_checkout_cmd = ['git', 'clone', '-n', gitrepo, sourcedir]
            update_checkout_cmd = ['git', 'reset', '--hard', self.revision]
            update_checkout_dir = sourcedir

            # self.module may be empty, in which case the specfile should be in the top-level directory
            if self.module:
                # Treat the module as a directory inside the git repository
                sourcedir = '%s/%s' % (sourcedir, self.module)

        elif self.scmtype == 'SVN':
            scheme = self.scheme
            if '+' in scheme:
                scheme = scheme.split('+')[1]

            svnserver = '%s%s%s' % (scheme, self.host, self.repository)
            module_checkout_cmd = ['svn', 'checkout', '-r', self.revision, '%s/%s' % (svnserver, self.module), self.module]

        elif self.scmtype == 'SVN+SSH':
            if not self.user:
                raise BuildError, 'No user specified for repository access scheme: %s' % self.scheme

            svnserver = 'svn+ssh://%s@%s%s' % (self.user, self.host, self.repository)
            module_checkout_cmd = ['svn', 'checkout', '-r', self.revision, '%s/%s' % (svnserver, self.module), self.module]

        else:
            raise BuildError, 'Unknown SCM type: %s' % self.scmtype

        # perform checkouts
        ret, output = run(module_checkout_cmd, chdir=scmdir, logfile=logfile)
        log(output)
        if ret:
            raise BuildError, 'Error running %s checkout command "%s: %s"' % \
                (self.scmtype, ' '.join(module_checkout_cmd), output)

        if update_checkout_cmd:
            # Currently only required for GIT checkouts
            # Run the command in the directory the source was checked out into
            if self.scmtype.startswith('GIT'):
                run(['git', 'config', 'core.autocrlf',  'true'], chdir=update_checkout_dir, logfile=logfile, fatal=True)
                run(['git', 'config', 'core.safecrlf',  'true'], chdir=update_checkout_dir, logfile=logfile, fatal=True)
            ret, output = run(update_checkout_cmd, chdir=update_checkout_dir, logfile=logfile)
            log(output)
            if ret:
                raise BuildError, 'Error running %s update command "%s": %s' % \
                    (self.scmtype, ' '.join(update_checkout_cmd), output)

        return sourcedir

############################
# End heinous copy and paste
############################

def ensuredir(path):
    if not os.path.isdir(path):
        os.makedirs(path)
    return path

class WindowsBuild(object):

    def __init__(self, server, logfile):
        """constructor: check ini spec file syntax, set build properties"""
        self.server = server
        info = server.getTaskInfo()
        self.source_url = info[0]
        self.build_tag = info[1]
        if len(info) > 2:
            self.task_opts = info[2]
        else:
            self.task_opts = {}
        self.workdir = '/tmp/build'
        ensuredir(self.workdir)
        self.source_dir = None
        self.spec_dir = None
        self.logfile = logfile

        # we initialize these here for clarity, but they are populated in loadConfig()
        self.name = None
        self.version = None
        self.release = None
        self.description = None
        self.platform = None
        self.buildrequires = []
        self.provides = []
        self.execute = []
        self.output = {}
        self.logs = []

    def checkEnv(self):
        """Is this environment fit to build in, based on the spec file?"""
        pass

    def checkout(self):
        """Checkout sources, specfile, and patches, and apply patches"""
        src_scm = SCM(self.source_url)
        self.source_dir = src_scm.checkout(ensuredir(os.path.join(self.workdir, 'source')), self.logfile)
        if 'specfile' in self.task_opts:
            spec_scm = SCM(self.task_opts['specfile'])
            self.spec_dir = spec_scm.checkout(ensuredir(os.path.join(self.workdir, 'spec')), self.logfile)
        else:
            self.spec_dir = self.source_dir
        if 'patches' in self.task_opts:
            patch_scm = SCM(self.task_opts['patches'])
            patch_dir = patch_scm.checkout(ensuredir(os.path.join(self.workdir, 'patches')), self.logfile)
            self.applyPatches(self.source_dir, patch_dir)

    def applyPatches(self, sourcedir, patchdir):
        """Apply patches in patchdir to files in sourcedir)"""
        patches = [patch for patch in os.listdir(patchdir) if \
                   os.path.isfile(os.path.join(patchdir, patch)) and \
                   not patch.startswith('.')]
        if not patches:
            raise BuildError, 'no patches found at %s' % patchdir
        patches.sort()
        for patch in patches:
            cmd = ['/usr/bin/patch', '--verbose', '-d', sourcedir, '-p1', '-i', os.path.join(patchdir, patch)]
            ret, output = run(cmd, logfile=self.logfile)
            if ret:
                raise BuildError, 'error applying patches, output was: %s' % output

    def loadConfig(self):
        """Load build configuration from the spec file."""
        specfiles = [spec for spec in os.listdir(self.spec_dir) if spec.endswith('.ini')]
        if len(specfiles) == 0:
            raise BuildError, 'No .ini file found'
        elif len(specfiles) > 1:
            raise BuildError, 'Multiple .ini files found'

        conf = ConfigParser()
        conf.read(os.path.join(self.spec_dir, specfiles[0]))

        # [naming] section
        for entry in ('name', 'version', 'release', 'description'):
            setattr(self, entry, conf.get('naming', entry))

        # [building] section
        self.platform = conf.get('building', 'platform')
        # buildrequires and provides are multi-valued (space-separated)
        for entry in ('buildrequires', 'provides'):
            getattr(self, entry).extend([e for e in conf.get('building', entry).split() if e])
        # execute is multi-valued (newline-separated)
        self.execute.extend([e.strip() for e in conf.get('building', 'execute').split('\n') if e])

        # [files] section
        for entry in conf.get('files', 'output').split('\n'):
            if not entry:
                continue
            tokens = entry.strip().split(':')
            filename = tokens[0]
            metadata = {}
            metadata['platforms'] = tokens[1].split(',')
            if len(tokens) > 2:
                metadata['flags'] = tokens[2].split(',')
            else:
                metadata['flags'] = []
            self.output[filename] = metadata
        self.logs.extend([e.strip() for e in conf.get('files', 'logs').split('\n') if e])

    def fetchBuildReqs(self):
        """Retrieve buildrequires listed in the spec file"""
        for br in self.buildrequires:
            pass

    def build(self):
        """Do the build: run the execute line(s)"""
        tmpfd, tmpname = tempfile.mkstemp(prefix='koji-tmp.', dir='/tmp')
        script = os.fdopen(tmpfd, 'w')
        for cmd in self.execute:
            script.write(cmd)
            script.write('\n')
        script.close()
        cmd = ['/bin/bash', '-e', '-x', tmpname]
        ret, output = run(cmd, chdir=self.source_dir, logfile=self.logfile)
        if ret:
            raise BuildError, 'Build command failed, see %s for details' % \
                  os.path.basename(self.logfile.name)

    def virusCheck(self):
        """Check the build output for viruses"""
        pass

    def gatherResults(self):
        """Gather information about the output from the build, return it"""
        return {'name': self.name, 'version': self.version, 'release': self.release,
                'description': self.description, 'platform': self.platform,
                'provides': self.provides,
                'output': self.output, 'logs': self.logs}

    def run(self):
        """Run the entire build process"""
        self.checkEnv()
        self.checkout()
        self.loadConfig()
        self.fetchBuildReqs()
        self.build()
        self.virusCheck()
        return self.gatherResults()

def log(msg):
    print >> sys.stderr, '%s: %s' % (datetime.datetime.now().ctime(), msg)

def run(cmd, chdir=None, logfile=None, fatal=False):
    shell = False
    if isinstance(cmd, (str, unicode)) and len(cmd.split()) > 1:
        shell = True
    olddir = None
    if chdir:
        olddir = os.getcwd()
        os.chdir(chdir)
    log('running command: %s' % cmd)
    proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
                            close_fds=True, shell=shell)
    ret = proc.wait()
    output = proc.stdout.read()
    if logfile:
        logfile.write('$ ')
        if isinstance(cmd, (str, unicode)):
            logfile.write(cmd)
        else:
            logfile.write(' '.join(cmd))
        logfile.write('\n')
        logfile.write(output)
        logfile.write('\n')
    if olddir:
        os.chdir(olddir)
    if ret and fatal:
        raise BuildError, 'error running: %s, return code was %s, output was: %s' % (cmd, ret, output)
    return ret, output

def find_net_info():
    """
    Find the network gateway configured for this VM.
    """
    ret, output = run(['ipconfig', '/all'])
    if ret:
        raise RuntimeError, 'error running ipconfig, output was: %s' % output
    macaddr = None
    gateway = None
    for line in output.splitlines():
        line = line.strip()
        # take the first values we find
        if line.startswith('Physical Address'):
            if not macaddr:
                macaddr = line.split()[-1]
                # format it to be consistent with the libvirt MAC address
                macaddr = macaddr.replace('-', ':').lower()
        elif line.startswith('Default Gateway'):
            if not gateway:
                gateway = line.split()[-1]

    # check that we have valid values
    if macaddr and len(macaddr) != 17:
        macaddr = None
    if gateway and (len(gateway) < 7 or len(gateway) > 15):
        gateway = None
    return macaddr, gateway

def upload_file(server, prefix, path):
    """upload a single file to the vmd"""
    fobj = file(os.path.join(prefix, path), 'r')
    offset = 0
    sum = hashlib.sha1()
    while True:
        data = fobj.read(131072)
        if not data:
            break
        encoded = base64.b64encode(data)
        server.upload(path, offset, encoded)
        offset += len(data)
        sum.update(data)
    fobj.close()
    server.verifyChecksum(path, sum.hexdigest(), 'sha1')

def get_mgmt_server():
    """Get a ServerProxy object we can use to retrieve task info"""
    macaddr, gateway = find_net_info()
    while not (macaddr and gateway):
        # wait for the network connection to come up and get an address
        time.sleep(5)
        macaddr, gateway = find_net_info()
    log('found MAC address %s, connecting to %s:%s' %
        (macaddr, gateway, MANAGER_PORT))
    server = xmlrpclib.ServerProxy('http://%s:%s/' %
                                   (gateway, MANAGER_PORT), allow_none=True)
    # we would set a timeout on the socket here, but that is apparently not
    # supported by python/cygwin/Windows
    task_port = server.getPort(macaddr)
    log('found task-specific port %s' % task_port)
    return xmlrpclib.ServerProxy('http://%s:%s/' % (gateway, task_port), allow_none=True)

def get_options():
    """handle usage and parse options"""
    usage = """%prog [options]
    Run Koji tasks assigned to a VM.
    Run without any arguments to start this daemon.
    """
    parser = OptionParser(usage=usage)
    parser.add_option('-i', '--install', action='store_true', help='Install this daemon as a service', default=False)
    parser.add_option('-u', '--uninstall', action='store_true', help='Uninstall this daemon if it was installed previously as a service', default=False)
    (options, args) = parser.parse_args()
    return options

def flunk(server, logfile):
    """do the right thing when a build fails"""
    exc_info = sys.exc_info()
    tb = ''.join(traceback.format_exception(*exc_info))
    if server is not None:
        if logfile is not None:
            try:
                logfile.close()
                upload_file(server, os.path.dirname(logfile.name),
                            os.path.basename(logfile.name))
            except:
                pass
        server.failTask(tb)
    log(tb)
    sys.exit(1)

if __name__ == '__main__':
    prog = os.path.basename(sys.argv[0])
    opts = get_options()
    if opts.install:
        ret, output = run(['cygrunsrv', '--install', prog,
                           '--path', sys.executable, '--args', os.path.abspath(prog),
                           '--type', 'auto', '--dep', 'Dhcp',
                           '--disp', 'Koji Windows Daemon',
                           '--desc', 'Runs Koji tasks assigned to a VM'])
        if ret:
            print 'Error installing %s service, output was: %s' % (prog, output)
            sys.exit(1)
        else:
            print 'Successfully installed the %s service' % prog
            sys.exit(0)
    elif opts.uninstall:
        ret, output = run(['cygrunsrv', '--remove', prog])
        if ret:
            print 'Error removing the %s service, output was: %s' % (prog, output)
            sys.exit(1)
        else:
            print 'Successfully removed the %s service' % prog
            sys.exit(0)

    server = None
    logfile = None
    try:
        logfile = file('/tmp/build.log', 'w')
        server = get_mgmt_server()
        build = WindowsBuild(server, logfile)
        results = build.run()
        logfile.close()

        upload_file(server, '/tmp', 'build.log')
        for filename in results['output'].keys() + results['logs']:
            upload_file(server, build.source_dir, filename)
        results['logs'].append('build.log')
        server.closeTask(results)
        log('Build results: %s' % results)
    except:
        flunk(server, logfile)
    sys.exit(0)