debian-koji/builder/mergerepos
Tomas Kopecek c6e69b4f8b backport py27 compatible file open with encoding
client and builder needs to run on py27 which doesn't support
open(encoding='utf-8')

Related: https://pagure.io/koji/issue/2641
2021-02-16 14:13:03 +01:00

370 lines
15 KiB
Python
Executable file

#!/usr/bin/python2
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Library General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# Copyright 2009-2014 Red Hat, Inc.
# Written by Mike Bonnet <mikeb@redhat.com>
# Merge repos using rules specific to Koji
# Largely borrowed from the mergerepo script included in createrepo and
# written by Seth Vidal
from __future__ import absolute_import
import os.path
import shutil
import sys
import tempfile
from optparse import OptionParser
import createrepo
import rpmUtils.miscutils
import yum
import yum.misc
import koji
# Expand a canonical arch to the full list of
# arches that should be included in the repo.
# Basically the inverse of koji.canonArch().
# Lists taken from rpmUtils.arch.
EXPAND_ARCHES = {
'i386': ['i486', 'i586', 'geode', 'i686', 'athlon'],
'x86_64': ['ia32e', 'amd64'],
'ppc64': ['ppc64pseries', 'ppc64iseries'],
'sparc64': ['sparc64v', 'sparc64v2'],
'sparc': ['sparcv8', 'sparcv9', 'sparcv9v', 'sparcv9v2'],
'alpha': ['alphaev4', 'alphaev45', 'alphaev5', 'alphaev56',
'alphapca56', 'alphaev6', 'alphaev67', 'alphaev68', 'alphaev7'],
'armhfp': ['armv7hl', 'armv7hnl', 'armv6hl', 'armv6hnl'],
'arm': ['armv5tel', 'armv5tejl', 'armv6l', 'armv7l'],
'sh4': ['sh4a']
}
MULTILIB_ARCHES = {
'x86_64': 'i386',
'ppc64': 'ppc',
's390x': 's390'
}
def parse_args(args):
"""Parse our opts/args"""
usage = """
mergerepos: take 2 or more repositories and merge their metadata into a new
repo using Koji semantics
mergerepos --repo=url --repo=url --outputdir=/some/path"""
parser = OptionParser(version="mergerepos 0.1", usage=usage)
# query options
parser.add_option("-r", "--repo", dest="repos", default=[], action="append",
help="repo url")
parser.add_option("-g", "--groupfile", default=None,
help="path to groupfile to include in metadata")
parser.add_option("-a", "--arch", dest="arches", default=[], action="append",
help="List of arches to include in the repo")
parser.add_option("-b", "--blocked", default=None,
help="A file containing a list of srpm names to exclude "
"from the merged repo")
parser.add_option("--mode", default='koji', help="Select the merge mode")
parser.add_option("-o", "--outputdir", default=None,
help="Location to create the repository")
parser.add_option("--tempdir", default=None,
help="Location for temporary files")
(opts, argsleft) = parser.parse_args(args)
if len(opts.repos) < 1:
parser.print_usage()
sys.exit(1)
# expand arches
for arch in opts.arches[:]:
if arch in EXPAND_ARCHES:
opts.arches.extend(EXPAND_ARCHES[arch])
# support multilib repos
for arch in opts.arches[:]:
multilib_arch = MULTILIB_ARCHES.get(arch)
if multilib_arch:
opts.arches.append(multilib_arch)
if multilib_arch in EXPAND_ARCHES:
opts.arches.extend(EXPAND_ARCHES[multilib_arch])
# always include noarch
if 'noarch' not in opts.arches:
opts.arches.append('noarch')
if not opts.outputdir:
parser.error('You must specify an outputdir with -o')
sys.exit(1)
return opts
def make_const_func(value):
"""Return a function that returns the given value"""
return lambda *a: value
class RepoMerge(object):
def __init__(self, repolist, arches, groupfile, blocked, outputdir,
tempdir=None, mode='koji'):
self.repolist = repolist
self.outputdir = outputdir
self.tempdir = tempdir
self.mdconf = createrepo.MetaDataConfig()
# explicitly request sha1 for backward compatibility with older yum
self.mdconf.sumtype = 'sha1'
self.mdconf.database = True
self.mdconf.verbose = True
self.mdconf.changelog_limit = 3
self.yumbase = yum.YumBase()
if hasattr(self.yumbase, 'preconf'):
self.yumbase.preconf.fn = '/dev/null'
self.yumbase.preconf.init_plugins = False
self.yumbase.preconf.debuglevel = 2
else:
self.yumbase._getConfig('/dev/null', init_plugins=False, debuglevel=2)
self.yumbase.conf.cachedir = tempfile.mkdtemp(dir=self.tempdir)
self.yumbase.conf.cache = 0
self.archlist = arches
self.mdconf.groupfile = groupfile
self.blocked = blocked
self.mode = mode
def close(self):
if self.yumbase is not None:
cachedir = self.yumbase.conf.cachedir
self.yumbase.close()
self.yumbase = None
self.mdconf = None
if os.path.isdir(cachedir):
shutil.rmtree(cachedir)
def __del__(self):
self.close()
def merge_repos(self):
self.yumbase.repos.disableRepo('*')
# add our repos and give them a merge rank in the order they appear in
# in the repolist
count = 0
for r in self.repolist:
count += 1
rid = 'repo%s' % count
sys.stderr.write('Adding repo: %s\n' % r)
n = self.yumbase.add_enable_repo(rid, baseurls=[r])
n._merge_rank = count
# setup our sacks
self.yumbase._getSacks(archlist=self.archlist)
self.sort_and_filter()
def sort_and_filter(self):
"""
For each package object, check if the srpm name has ever been seen before.
If is has not, keep the package. If it has, check if the srpm name was first seen
in the same repo as the current package. If so, keep the package from the srpm with the
highest NVR. If not, keep the packages from the first srpm we found, and delete packages
from all other srpms.
Packages with matching NVRs in multiple repos will be taken from the first repo.
If the srpm name appears in the blocked package list, any packages generated from the srpm
will be deleted from the package sack as well.
This method will also generate a file called "pkgorigins" and add it to the repo metadata.
This is a tab-separated map of package E:N-V-R.A to repo URL (as specified on the
command-line). This allows a package to be tracked back to its origin, even if the location
field in the repodata does not match the original repo location.
"""
if self.mode == 'simple':
return self.do_simple_sort()
# sort the repos by _merge_rank
# lowest number is the highest rank (1st place, 2nd place, etc.)
repos = self.yumbase.repos.listEnabled()
repos.sort(key=lambda o: o._merge_rank)
include_srpms = {}
# calculating what "builds" (srpms) we're allowing into the repo
seen_srpms = {}
for reponum, repo in enumerate(repos):
for pkg in repo.sack:
if reponum == 0 and not pkg.basepath:
# this is the first repo (i.e. the koji repo) and appears
# to be using relative urls
# XXX - kind of a hack, but yum leaves us little choice
# force the pkg object to report a relative location
loc = """<location href="%s"/>\n""" % yum.misc.to_xml(pkg.remote_path,
attrib=True)
pkg._return_remote_location = make_const_func(loc)
if pkg.sourcerpm in seen_srpms:
# we're just looking at sourcerpms this pass and we've
# already seen this one
continue
seen_srpms[pkg.sourcerpm] = 1
srpm_name, ver, rel, epoch, arch = rpmUtils.miscutils.splitFilename(pkg.sourcerpm)
if srpm_name in include_srpms:
other_srpm, other_repoid = include_srpms[srpm_name]
if pkg.repoid != other_repoid:
# We found a rpm built from an srpm with the same name in a previous repo.
# The previous repo takes precedence, so ignore the srpm found here.
sys.stderr.write('Package %s already provided by repo %s'
' (at %s in repo %s)\n'
% (srpm_name, other_repoid, str(pkg), pkg.repoid))
continue
else:
# We're in the same repo, so compare srpm NVRs
other_srpm_name, other_ver, other_rel, other_epoch, other_arch = \
rpmUtils.miscutils.splitFilename(other_srpm)
cmp = rpmUtils.miscutils.compareEVR((epoch, ver, rel),
(other_epoch, other_ver, other_rel))
if cmp > 0:
# The current package we're processing is from a newer srpm than the
# existing srpm in the dict, so update the dict
include_srpms[srpm_name] = (pkg.sourcerpm, pkg.repoid)
sys.stderr.write('Replacing older source nvr: '
'%s > %s\n' % (pkg.sourcerpm, other_srpm))
elif cmp < 0:
sys.stderr.write('Ignoring older source nvr: '
'%s < %s\n' % (pkg.sourcerpm, other_srpm))
# otherwise same, so we already have it
elif srpm_name in self.blocked:
sys.stderr.write('Ignoring blocked package: %s\n\n' %
pkg.sourcerpm)
continue
else:
include_srpms[srpm_name] = (pkg.sourcerpm, pkg.repoid)
pkgorigins = os.path.join(self.yumbase.conf.cachedir, 'pkgorigins')
origins = koji._open_text_file(pkgorigins, 'wt')
seen_rpms = {}
for repo in repos:
for pkg in repo.sack:
srpm_name, ver, rel, epoch, arch = rpmUtils.miscutils.splitFilename(pkg.sourcerpm)
incl_srpm, incl_repoid = include_srpms.get(srpm_name, (None, None))
pkg_nvra = str(pkg)
if pkg_nvra in seen_rpms:
sys.stderr.write('Duplicate rpm: %s\n' % pkg_nvra)
elif incl_srpm is None:
sys.stderr.write('Excluding %s (%s is blocked)\n'
% (pkg_nvra, srpm_name))
repo.sack.delPackage(pkg)
elif incl_srpm == pkg.sourcerpm:
origins.write('%s\t%s\n' % (pkg_nvra, repo.urls[0]))
seen_rpms[pkg_nvra] = 1
else:
sys.stderr.write('Excluding %s (wrong srpm version '
'%s != %s)\n' % (pkg_nvra, pkg.sourcerpm, incl_srpm))
repo.sack.delPackage(pkg)
origins.close()
self.mdconf.additional_metadata['origin'] = pkgorigins
def do_simple_sort(self):
"""
Handle the 'sort_and_filter' case when mode=simple
As the name implies, this is a much simpler approach. Mainly, we need
to generate the pkgorigins file.
"""
# sort the repos by _merge_rank
# lowest number is the highest rank (1st place, 2nd place, etc.)
repos = self.yumbase.repos.listEnabled()
repos.sort(key=lambda o: o._merge_rank)
# TODO: reduce duplication between this function and sort_and_filter()
# We lack the complex filtration of mode=koji, but we still need to:
# - fix urls for primary repo
# - enforce blocked list
for reponum, repo in enumerate(repos):
for pkg in repo.sack:
if reponum == 0 and not pkg.basepath:
# this is the first repo (i.e. the koji repo) and appears
# to be using relative urls
# XXX - kind of a hack, but yum leaves us little choice
# force the pkg object to report a relative location
loc = """<location href="%s"/>\n""" % yum.misc.to_xml(pkg.remote_path,
attrib=True)
pkg._return_remote_location = make_const_func(loc)
pkgorigins = os.path.join(self.yumbase.conf.cachedir, 'pkgorigins')
origins = koji._open_text_file(pkgorigins, 'wt')
seen_rpms = {}
for repo in repos:
for pkg in repo.sack:
srpm_name, ver, rel, epoch, arch = rpmUtils.miscutils.splitFilename(pkg.sourcerpm)
pkg_nvra = str(pkg)
if pkg_nvra in seen_rpms:
sys.stderr.write('Duplicate rpm: %s\n' % pkg_nvra)
# note: we warn, but do not omit it
if srpm_name in self.blocked:
sys.stderr.write('Ignoring blocked package: %s\n\n' %
pkg.sourcerpm)
repo.sack.delPackage(pkg)
if pkg_nvra not in seen_rpms:
origins.write('%s\t%s\n' % (pkg_nvra, repo.urls[0]))
seen_rpms[pkg_nvra] = 1
origins.close()
self.mdconf.additional_metadata['origin'] = pkgorigins
def write_metadata(self):
self.mdconf.pkglist = self.yumbase.pkgSack
self.mdconf.directory = self.outputdir
# clean out what was there
if os.path.exists(self.mdconf.directory + '/repodata'):
shutil.rmtree(self.mdconf.directory + '/repodata')
if not os.path.exists(self.mdconf.directory):
os.makedirs(self.mdconf.directory)
mdgen = createrepo.MetaDataGenerator(config_obj=self.mdconf)
mdgen.doPkgMetadata()
mdgen.doRepoMetadata()
mdgen.doFinalMove()
def main(args):
"""main"""
opts = parse_args(args)
if opts.blocked:
with koji._open_text_file(opts.blocked) as blocked_fo:
blocked_list = blocked_fo.readlines()
blocked = dict([(b.strip(), 1) for b in blocked_list])
else:
blocked = {}
merge = RepoMerge(opts.repos, opts.arches, opts.groupfile, blocked,
opts.outputdir, opts.tempdir, opts.mode)
try:
merge.merge_repos()
merge.write_metadata()
finally:
merge.close()
if __name__ == "__main__":
main(sys.argv[1:])