public inbox for gentoo-commits@lists.gentoo.org
 help / color / mirror / Atom feed
From: "Brian Dolbec" <dolsen@gentoo.org>
To: gentoo-commits@lists.gentoo.org
Subject: [gentoo-commits] proj/portage:master commit in: pym/repoman/modules/scan/metadata/
Date: Sun, 15 May 2016 23:51:12 +0000 (UTC)	[thread overview]
Message-ID: <1463250580.633cabfb9215633c554fd967b9875310be3718bd.dolsen@gentoo> (raw)
Message-ID: <20160515235112.pnb7RWsyxdHto5Pr7ItS7RMRRLjRlIajVYXRM1i7Icc@z> (raw)

commit:     633cabfb9215633c554fd967b9875310be3718bd
Author:     Dirkjan Ochtman <dirkjan <AT> ochtman <DOT> nl>
AuthorDate: Tue May  3 07:18:05 2016 +0000
Commit:     Brian Dolbec <dolsen <AT> gentoo <DOT> org>
CommitDate: Sat May 14 18:29:40 2016 +0000
URL:        https://gitweb.gentoo.org/proj/portage.git/commit/?id=633cabfb

repoman: Use lxml for parsing of metadata

Note that we no longer throw a QA error for a missing XML prolog, as long as
the encoding matches the default ('UTF-8'; lowercase is also allowed).
Update import error message to lxml pkg.

 pym/repoman/modules/scan/metadata/pkgmetadata.py | 46 +++++++-----------------
 1 file changed, 13 insertions(+), 33 deletions(-)

diff --git a/pym/repoman/modules/scan/metadata/pkgmetadata.py b/pym/repoman/modules/scan/metadata/pkgmetadata.py
index bcddb3e..317ab56 100644
--- a/pym/repoman/modules/scan/metadata/pkgmetadata.py
+++ b/pym/repoman/modules/scan/metadata/pkgmetadata.py
@@ -7,14 +7,14 @@ import sys
 from itertools import chain
 
 try:
-	import xml.etree.ElementTree
-	from xml.parsers.expat import ExpatError
+	from lxml import etree
+	from lxml.etree import ParserError
 except (SystemExit, KeyboardInterrupt):
 	raise
 except (ImportError, SystemError, RuntimeError, Exception):
 	# broken or missing xml support
 	# http://bugs.python.org/issue14988
-	msg = ["Please enable python's \"xml\" USE flag in order to use repoman."]
+	msg = ["Please emerge dev-python/lxml in order to use repoman."]
 	from portage.output import EOutput
 	out = EOutput()
 	for line in msg:
@@ -26,12 +26,11 @@ from repoman._portage import portage
 from repoman.metadata import metadata_dtd_uri
 from repoman.checks.herds.herdbase import get_herd_base
 from repoman.checks.herds.metadata import check_metadata, UnknownHerdsError
-from repoman._xml import _XMLParser, _MetadataTreeBuilder, XmlLint
+from repoman._xml import XmlLint
 from repoman.modules.scan.scanbase import ScanBase
 
 from portage.exception import InvalidAtom
 from portage import os
-from portage import _encodings, _unicode_encode
 from portage import exception
 from portage.dep import Atom
 
@@ -145,50 +144,31 @@ class PkgMetadata(ScanBase, USEFlagChecks):
 
 		# metadata.xml parse check
 		metadata_bad = False
-		xml_info = {}
-		xml_parser = _XMLParser(xml_info, target=_MetadataTreeBuilder())
 
 		# read metadata.xml into memory
 		try:
-			_metadata_xml = xml.etree.ElementTree.parse(
-				_unicode_encode(
-					os.path.join(checkdir, "metadata.xml"),
-					encoding=_encodings['fs'], errors='strict'),
-				parser=xml_parser)
-		except (ExpatError, SyntaxError, EnvironmentError) as e:
+			_metadata_xml = etree.parse(os.path.join(checkdir, 'metadata.xml'))
+		except (ParserError, SyntaxError, EnvironmentError) as e:
 			metadata_bad = True
 			self.qatracker.add_error("metadata.bad", "%s/metadata.xml: %s" % (xpkg, e))
 			del e
 			self.muselist = frozenset(self.musedict)
 			return False
 
-		if "XML_DECLARATION" not in xml_info:
+		xml_encoding = _metadata_xml.docinfo.encoding
+		if xml_encoding.upper() != metadata_xml_encoding:
 			self.qatracker.add_error(
 				"metadata.bad", "%s/metadata.xml: "
-				"xml declaration is missing on first line, "
-				"should be '%s'" % (xpkg, metadata_xml_declaration))
-		else:
-			xml_version, xml_encoding, xml_standalone = \
-				xml_info["XML_DECLARATION"]
-			if xml_encoding is None or \
-				xml_encoding.upper() != metadata_xml_encoding:
-				if xml_encoding is None:
-					encoding_problem = "but it is undefined"
-				else:
-					encoding_problem = "not '%s'" % xml_encoding
-				self.qatracker.add_error(
-					"metadata.bad", "%s/metadata.xml: "
-					"xml declaration encoding should be '%s', %s" %
-					(xpkg, metadata_xml_encoding, encoding_problem))
+				"xml declaration encoding should be '%s', not '%s'" %
+				(xpkg, metadata_xml_encoding, xml_encoding))
 
-		if "DOCTYPE" not in xml_info:
+		if not _metadata_xml.docinfo:
 			metadata_bad = True
 			self.qatracker.add_error(
 				"metadata.bad",
 				"%s/metadata.xml: %s" % (xpkg, "DOCTYPE is missing"))
 		else:
-			doctype_name, doctype_system, doctype_pubid = \
-				xml_info["DOCTYPE"]
+			doctype_system = _metadata_xml.docinfo.system_url
 			if doctype_system != metadata_dtd_uri:
 				if doctype_system is None:
 					system_problem = "but it is undefined"
@@ -198,7 +178,7 @@ class PkgMetadata(ScanBase, USEFlagChecks):
 					"metadata.bad", "%s/metadata.xml: "
 					"DOCTYPE: SYSTEM should refer to '%s', %s" %
 					(xpkg, metadata_dtd_uri, system_problem))
-
+			doctype_name = _metadata_xml.docinfo.doctype.split(' ')[1]
 			if doctype_name != metadata_doctype_name:
 				self.qatracker.add_error(
 					"metadata.bad", "%s/metadata.xml: "


             reply	other threads:[~2016-05-15 23:51 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-05-14 18:33 Brian Dolbec [this message]
2016-05-15 23:51 ` [gentoo-commits] proj/portage:master commit in: pym/repoman/modules/scan/metadata/ Brian Dolbec
  -- strict thread matches above, loose matches on Subject: below --
2016-05-14 18:33 [gentoo-commits] proj/portage:repoman " Brian Dolbec
2016-05-14 18:33 Brian Dolbec
2016-05-14 18:33 Brian Dolbec
2016-05-14 18:33 Brian Dolbec
2016-05-14 18:33 Brian Dolbec
2016-05-14 18:33 Brian Dolbec
2016-05-14 18:33 Brian Dolbec
2016-05-14 18:33 Brian Dolbec
2016-05-14 18:33 Brian Dolbec
2016-05-08 21:21 Brian Dolbec
2016-05-08 21:21 Brian Dolbec
2016-05-08 21:21 Brian Dolbec
2016-05-08 21:21 Brian Dolbec
2016-05-08 21:21 Brian Dolbec
2016-05-08 21:21 Brian Dolbec
2016-05-08 21:21 Brian Dolbec
2016-05-08 21:21 Brian Dolbec
2016-05-08 21:21 Brian Dolbec
2016-05-05 16:06 Brian Dolbec
2016-05-05 16:02 Brian Dolbec
2016-05-04  6:24 Brian Dolbec
2016-05-04  6:09 Brian Dolbec
2016-05-04  3:45 Brian Dolbec
2016-05-03 20:58 Brian Dolbec
2016-05-03 20:58 Brian Dolbec
2016-05-03 17:43 Brian Dolbec
2016-05-03 17:43 Brian Dolbec
2016-05-03  9:33 Brian Dolbec
2016-05-03  9:33 Brian Dolbec
2016-05-03  9:33 Brian Dolbec
2016-05-03  9:33 Brian Dolbec
2016-05-03  9:33 Brian Dolbec
2016-04-25 15:32 Brian Dolbec
2016-04-21 16:54 Brian Dolbec
2016-03-15 19:00 Brian Dolbec
2016-01-06  4:21 Brian Dolbec

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1463250580.633cabfb9215633c554fd967b9875310be3718bd.dolsen@gentoo \
    --to=dolsen@gentoo.org \
    --cc=gentoo-commits@lists.gentoo.org \
    --cc=gentoo-dev@lists.gentoo.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox