From 6e574a50673c0b28dc7a642c82c6d3bb5440f8db Mon Sep 17 00:00:00 2001
From: Nils Philippsen <nils@redhat.com>
Date: Mon, 25 Nov 2019 17:55:10 +0100
Subject: [PATCH] straighten out finding primary.xml file

Signed-off-by: Nils Philippsen <nils@redhat.com>
---
 distgit_bugzilla_sync/package_summaries.py | 52 +++++++++++++---------
 1 file changed, 30 insertions(+), 22 deletions(-)

diff --git a/distgit_bugzilla_sync/package_summaries.py b/distgit_bugzilla_sync/package_summaries.py
index bfbf474..1ad2e82 100644
--- a/distgit_bugzilla_sync/package_summaries.py
+++ b/distgit_bugzilla_sync/package_summaries.py
@@ -19,6 +19,7 @@ import time
 
 from defusedxml import cElementTree as etree
 import requests
+from xml.etree.ElementTree import ParseError
 
 
 KOJI_REPO = 'https://kojipkgs.fedoraproject.org/repos/'
@@ -99,32 +100,39 @@ def get_primary_xml(destfolder, url, name):
     repomd_url = url + '/repomd.xml'
     response = requests.get(repomd_url, verify=True)
     if not bool(response):
-        print('%s !! Failed to get %r %r' % (
-            name.ljust(12), repomd_url, response))
+        log.warning('%-12s !! Failed to get %s %s', name, repomd_url, response)
         return
 
-    # Parse the xml doc and get a list of locations and their shasum.
-    files = ((
-        node.find('repo:location', repomd_xml_namespace),
-        node.find('repo:open-checksum', repomd_xml_namespace),
-    ) for node in etree.fromstring(response.text))
-
-    # Extract out the attributes that we're really interested in.
-    files = (
-        (f.attrib['href'].replace('repodata/', ''), s.text, s.attrib['type'])
-        for f, s in files if f is not None and s is not None
-    )
-
-    # Filter down to only the primary.xml files
-    files = [(f, s, t) for f, s, t in files if 'primary.xml' in f]
-
-    if not files:
-        log.debug('No primary.xml could be found in %s' % url)
-    elif len(files) > 1:
-        log.debug("More than one primary.xml could be found in %s" % url)
+    try:
+        root = etree.fromstring(response.text)
+    except ParseError:
+        log.warning('%-12s !! Failed to parse %s %s', name, repomd_url, response)
         return
 
-    filename, shasum, shatype = files[0]
+    data_nodes = list(root.findall('repo:data[@type="primary"]', repomd_xml_namespace))
+    if not data_nodes:
+        log.debug('No primary.xml could be found in %s', url)
+        return
+    elif len(data_nodes) > 1:
+        log.debug("More than one primary.xml could be found in %s", url)
+        return
+
+    primary_node = data_nodes[0]
+
+    location_node = primary_node.find('repo:location', repomd_xml_namespace)
+    if location_node is None or 'href' not in location_node.attrib:
+        log.debug('No valid location found for primary.xml in %s', url)
+        return
+
+    cksuminfo_node = primary_node.find('repo:open-checksum', repomd_xml_namespace)
+    if cksuminfo_node is None or 'type' not in cksuminfo_node.attrib:
+        log.debug('No valid checksum information found for primary.xml in %s', url)
+        return
+
+    filename = location_node.attrib['href'].replace('repodata/', '')
+    hash_digest = cksuminfo_node.text
+    hash_type = cksuminfo_node.attrib['type']
+
     repomd_url = url + '/' + filename
 
     # First, determine if the file has changed by comparing hash