straighten out finding primary.xml file

Signed-off-by: Nils Philippsen <nils@redhat.com>
This commit is contained in:
Nils Philippsen 2019-11-25 17:55:10 +01:00
parent 399bb28420
commit 6e574a5067

View file

@ -19,6 +19,7 @@ import time
from defusedxml import cElementTree as etree
import requests
from xml.etree.ElementTree import ParseError
KOJI_REPO = 'https://kojipkgs.fedoraproject.org/repos/'
@ -99,32 +100,39 @@ def get_primary_xml(destfolder, url, name):
repomd_url = url + '/repomd.xml'
response = requests.get(repomd_url, verify=True)
if not bool(response):
print('%s !! Failed to get %r %r' % (
name.ljust(12), repomd_url, response))
log.warning('%-12s !! Failed to get %s %s', name, repomd_url, response)
return
# Parse the xml doc and get a list of locations and their shasum.
files = ((
node.find('repo:location', repomd_xml_namespace),
node.find('repo:open-checksum', repomd_xml_namespace),
) for node in etree.fromstring(response.text))
# Extract out the attributes that we're really interested in.
files = (
(f.attrib['href'].replace('repodata/', ''), s.text, s.attrib['type'])
for f, s in files if f is not None and s is not None
)
# Filter down to only the primary.xml files
files = [(f, s, t) for f, s, t in files if 'primary.xml' in f]
if not files:
log.debug('No primary.xml could be found in %s' % url)
elif len(files) > 1:
log.debug("More than one primary.xml could be found in %s" % url)
try:
root = etree.fromstring(response.text)
except ParseError:
log.warning('%-12s !! Failed to parse %s %s', name, repomd_url, response)
return
filename, shasum, shatype = files[0]
data_nodes = list(root.findall('repo:data[@type="primary"]', repomd_xml_namespace))
if not data_nodes:
log.debug('No primary.xml could be found in %s', url)
return
elif len(data_nodes) > 1:
log.debug("More than one primary.xml could be found in %s", url)
return
primary_node = data_nodes[0]
location_node = primary_node.find('repo:location', repomd_xml_namespace)
if location_node is None or 'href' not in location_node.attrib:
log.debug('No valid location found for primary.xml in %s', url)
return
cksuminfo_node = primary_node.find('repo:open-checksum', repomd_xml_namespace)
if cksuminfo_node is None or 'type' not in cksuminfo_node.attrib:
log.debug('No valid checksum information found for primary.xml in %s', url)
return
filename = location_node.attrib['href'].replace('repodata/', '')
hash_digest = cksuminfo_node.text
hash_type = cksuminfo_node.attrib['type']
repomd_url = url + '/' + filename
# First, determine if the file has changed by comparing hash