From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from lists.gentoo.org (pigeon.gentoo.org [208.92.234.80]) by finch.gentoo.org (Postfix) with ESMTP id 083DC13825A for ; Sun, 15 May 2016 23:51:17 +0000 (UTC) Received: from pigeon.gentoo.org (localhost [127.0.0.1]) by pigeon.gentoo.org (Postfix) with SMTP id 6CD4A141C9; Sun, 15 May 2016 23:51:16 +0000 (UTC) Received: from smtp.gentoo.org (smtp.gentoo.org [140.211.166.183]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by pigeon.gentoo.org (Postfix) with ESMTPS id 03C251418A for ; Sun, 15 May 2016 23:51:15 +0000 (UTC) Received: from oystercatcher.gentoo.org (unknown [IPv6:2a01:4f8:202:4333:225:90ff:fed9:fc84]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.gentoo.org (Postfix) with ESMTPS id 8C8A5340751 for ; Sun, 15 May 2016 23:51:14 +0000 (UTC) Received: from localhost.localdomain (localhost [127.0.0.1]) by oystercatcher.gentoo.org (Postfix) with ESMTP id 98F5B96A for ; Sun, 15 May 2016 23:51:12 +0000 (UTC) From: "Brian Dolbec" To: gentoo-commits@lists.gentoo.org Content-Transfer-Encoding: 8bit Content-type: text/plain; charset=UTF-8 Reply-To: gentoo-dev@lists.gentoo.org, "Brian Dolbec" Message-ID: <1463250580.633cabfb9215633c554fd967b9875310be3718bd.dolsen@gentoo> Subject: [gentoo-commits] proj/portage:master commit in: pym/repoman/modules/scan/metadata/ X-VCS-Repository: proj/portage X-VCS-Files: pym/repoman/modules/scan/metadata/pkgmetadata.py X-VCS-Directories: pym/repoman/modules/scan/metadata/ X-VCS-Committer: dolsen X-VCS-Committer-Name: Brian Dolbec X-VCS-Revision: 633cabfb9215633c554fd967b9875310be3718bd X-VCS-Branch: master Date: Sun, 15 May 2016 23:51:12 +0000 (UTC) Precedence: bulk List-Post: List-Help: List-Unsubscribe: List-Subscribe: List-Id: Gentoo Linux mail X-BeenThere: gentoo-commits@lists.gentoo.org X-Archives-Salt: 4f5e0d09-32f0-4f01-bc42-fff1ac0849a1 X-Archives-Hash: c76eaf11f34fb11a9f4e3292d256e0ff Message-ID: <20160515235112.pnb7RWsyxdHto5Pr7ItS7RMRRLjRlIajVYXRM1i7Icc@z> commit: 633cabfb9215633c554fd967b9875310be3718bd Author: Dirkjan Ochtman ochtman nl> AuthorDate: Tue May 3 07:18:05 2016 +0000 Commit: Brian Dolbec gentoo org> CommitDate: Sat May 14 18:29:40 2016 +0000 URL: https://gitweb.gentoo.org/proj/portage.git/commit/?id=633cabfb repoman: Use lxml for parsing of metadata Note that we no longer throw a QA error for a missing XML prolog, as long as the encoding matches the default ('UTF-8'; lowercase is also allowed). Update import error message to lxml pkg. pym/repoman/modules/scan/metadata/pkgmetadata.py | 46 +++++++----------------- 1 file changed, 13 insertions(+), 33 deletions(-) diff --git a/pym/repoman/modules/scan/metadata/pkgmetadata.py b/pym/repoman/modules/scan/metadata/pkgmetadata.py index bcddb3e..317ab56 100644 --- a/pym/repoman/modules/scan/metadata/pkgmetadata.py +++ b/pym/repoman/modules/scan/metadata/pkgmetadata.py @@ -7,14 +7,14 @@ import sys from itertools import chain try: - import xml.etree.ElementTree - from xml.parsers.expat import ExpatError + from lxml import etree + from lxml.etree import ParserError except (SystemExit, KeyboardInterrupt): raise except (ImportError, SystemError, RuntimeError, Exception): # broken or missing xml support # http://bugs.python.org/issue14988 - msg = ["Please enable python's \"xml\" USE flag in order to use repoman."] + msg = ["Please emerge dev-python/lxml in order to use repoman."] from portage.output import EOutput out = EOutput() for line in msg: @@ -26,12 +26,11 @@ from repoman._portage import portage from repoman.metadata import metadata_dtd_uri from repoman.checks.herds.herdbase import get_herd_base from repoman.checks.herds.metadata import check_metadata, UnknownHerdsError -from repoman._xml import _XMLParser, _MetadataTreeBuilder, XmlLint +from repoman._xml import XmlLint from repoman.modules.scan.scanbase import ScanBase from portage.exception import InvalidAtom from portage import os -from portage import _encodings, _unicode_encode from portage import exception from portage.dep import Atom @@ -145,50 +144,31 @@ class PkgMetadata(ScanBase, USEFlagChecks): # metadata.xml parse check metadata_bad = False - xml_info = {} - xml_parser = _XMLParser(xml_info, target=_MetadataTreeBuilder()) # read metadata.xml into memory try: - _metadata_xml = xml.etree.ElementTree.parse( - _unicode_encode( - os.path.join(checkdir, "metadata.xml"), - encoding=_encodings['fs'], errors='strict'), - parser=xml_parser) - except (ExpatError, SyntaxError, EnvironmentError) as e: + _metadata_xml = etree.parse(os.path.join(checkdir, 'metadata.xml')) + except (ParserError, SyntaxError, EnvironmentError) as e: metadata_bad = True self.qatracker.add_error("metadata.bad", "%s/metadata.xml: %s" % (xpkg, e)) del e self.muselist = frozenset(self.musedict) return False - if "XML_DECLARATION" not in xml_info: + xml_encoding = _metadata_xml.docinfo.encoding + if xml_encoding.upper() != metadata_xml_encoding: self.qatracker.add_error( "metadata.bad", "%s/metadata.xml: " - "xml declaration is missing on first line, " - "should be '%s'" % (xpkg, metadata_xml_declaration)) - else: - xml_version, xml_encoding, xml_standalone = \ - xml_info["XML_DECLARATION"] - if xml_encoding is None or \ - xml_encoding.upper() != metadata_xml_encoding: - if xml_encoding is None: - encoding_problem = "but it is undefined" - else: - encoding_problem = "not '%s'" % xml_encoding - self.qatracker.add_error( - "metadata.bad", "%s/metadata.xml: " - "xml declaration encoding should be '%s', %s" % - (xpkg, metadata_xml_encoding, encoding_problem)) + "xml declaration encoding should be '%s', not '%s'" % + (xpkg, metadata_xml_encoding, xml_encoding)) - if "DOCTYPE" not in xml_info: + if not _metadata_xml.docinfo: metadata_bad = True self.qatracker.add_error( "metadata.bad", "%s/metadata.xml: %s" % (xpkg, "DOCTYPE is missing")) else: - doctype_name, doctype_system, doctype_pubid = \ - xml_info["DOCTYPE"] + doctype_system = _metadata_xml.docinfo.system_url if doctype_system != metadata_dtd_uri: if doctype_system is None: system_problem = "but it is undefined" @@ -198,7 +178,7 @@ class PkgMetadata(ScanBase, USEFlagChecks): "metadata.bad", "%s/metadata.xml: " "DOCTYPE: SYSTEM should refer to '%s', %s" % (xpkg, metadata_dtd_uri, system_problem)) - + doctype_name = _metadata_xml.docinfo.doctype.split(' ')[1] if doctype_name != metadata_doctype_name: self.qatracker.add_error( "metadata.bad", "%s/metadata.xml: "