From: "Brian Dolbec" <dolsen@gentoo.org> To: gentoo-commits@lists.gentoo.org Subject: [gentoo-commits] proj/portage:repoman commit in: pym/repoman/modules/scan/metadata/, pym/repoman/ Date: Sat, 14 May 2016 18:33:56 +0000 (UTC) [thread overview] Message-ID: <1463250580.2d2b99917ed054bea083a0e47f30aac34f0fefdd.dolsen@gentoo> (raw) commit: 2d2b99917ed054bea083a0e47f30aac34f0fefdd Author: Dirkjan Ochtman <dirkjan <AT> ochtman <DOT> nl> AuthorDate: Tue May 3 09:01:29 2016 +0000 Commit: Brian Dolbec <dolsen <AT> gentoo <DOT> org> CommitDate: Sat May 14 18:29:40 2016 +0000 URL: https://gitweb.gentoo.org/proj/portage.git/commit/?id=2d2b9991 repoman: Migrate from XmlLint to etree.XMLSchema for validation Remove No longer used repoman._xml module This change based on work by Dirkjan Ochtman <djc <AT> gentoo.org> Updated the change from XML.DTD to XMLSchema. Additionally: Move the metadata.xsd path determination code to metadata.py. Add the DISTDIR backup location and fetching of the file if missing or stale. pym/repoman/_xml.py | 105 ----------------------- pym/repoman/metadata.py | 21 +++++ pym/repoman/modules/scan/metadata/pkgmetadata.py | 9 +- pym/repoman/scanner.py | 10 +-- 4 files changed, 26 insertions(+), 119 deletions(-) diff --git a/pym/repoman/_xml.py b/pym/repoman/_xml.py deleted file mode 100644 index 33a536a..0000000 --- a/pym/repoman/_xml.py +++ /dev/null @@ -1,105 +0,0 @@ -# -*- coding:utf-8 -*- - -from __future__ import print_function, unicode_literals - -import sys -import xml - -# import our initialized portage instance -from repoman._portage import portage - -from portage import os -from portage.output import red -from portage.process import find_binary - -from repoman.metadata import fetch_metadata_xsd -from repoman._subprocess import repoman_getstatusoutput - - -class _XMLParser(xml.etree.ElementTree.XMLParser): - - def __init__(self, data, **kwargs): - xml.etree.ElementTree.XMLParser.__init__(self, **kwargs) - self._portage_data = data - if hasattr(self, 'parser'): - self._base_XmlDeclHandler = self.parser.XmlDeclHandler - self.parser.XmlDeclHandler = self._portage_XmlDeclHandler - self._base_StartDoctypeDeclHandler = \ - self.parser.StartDoctypeDeclHandler - self.parser.StartDoctypeDeclHandler = \ - self._portage_StartDoctypeDeclHandler - - def _portage_XmlDeclHandler(self, version, encoding, standalone): - if self._base_XmlDeclHandler is not None: - self._base_XmlDeclHandler(version, encoding, standalone) - self._portage_data["XML_DECLARATION"] = (version, encoding, standalone) - - def _portage_StartDoctypeDeclHandler( - self, doctypeName, systemId, publicId, has_internal_subset): - if self._base_StartDoctypeDeclHandler is not None: - self._base_StartDoctypeDeclHandler( - doctypeName, systemId, publicId, has_internal_subset) - self._portage_data["DOCTYPE"] = (doctypeName, systemId, publicId) - - -class _MetadataTreeBuilder(xml.etree.ElementTree.TreeBuilder): - """ - Implements doctype() as required to avoid deprecation warnings with - >=python-2.7. - """ - def doctype(self, name, pubid, system): - pass - - -class XmlLint(object): - - def __init__(self, options, repoman_settings, metadata_xsd=None): - self.metadata_xsd = (metadata_xsd or - os.path.join(repoman_settings["DISTDIR"], 'metadata.xsd')) - self.options = options - self.repoman_settings = repoman_settings - self._is_capable = metadata_xsd is not None - self.binary = None - self._check_capable() - - def _check_capable(self): - if self.options.mode == "manifest": - return - self.binary = find_binary('xmllint') - if not self.binary: - print(red("!!! xmllint not found. Can't check metadata.xml.\n")) - elif not self._is_capable: - if not fetch_metadata_xsd(self.metadata_xsd, self.repoman_settings): - sys.exit(1) - # this can be problematic if xmllint changes their output - self._is_capable = True - - @property - def capable(self): - return self._is_capable - - def check(self, checkdir, repolevel): - '''Runs checks on the package metadata.xml file - - @param checkdir: string, path - @param repolevel: integer - @return boolean, False == bad metadata - ''' - if not self.capable: - if self.options.xml_parse or repolevel == 3: - print("%s sorry, xmllint is needed. failing\n" % red("!!!")) - sys.exit(1) - return True - # xmlint can produce garbage output even on success, so only dump - # the ouput when it fails. - st, out = repoman_getstatusoutput( - self.binary + " --nonet --noout --schema %s %s" % ( - portage._shell_quote(self.metadata_xsd), - portage._shell_quote( - os.path.join(checkdir, "metadata.xml")))) - if st != os.EX_OK: - print(red("!!!") + " metadata.xml is invalid:") - for z in out.splitlines(): - print(red("!!! ") + z) - return False - return True diff --git a/pym/repoman/metadata.py b/pym/repoman/metadata.py index 7c64c8e..a9ad3e8 100644 --- a/pym/repoman/metadata.py +++ b/pym/repoman/metadata.py @@ -99,3 +99,24 @@ def fetch_metadata_xsd(metadata_xsd, repoman_settings): pass return True + + +def get_metadata_xsd(repo_settings): + '''Locate and or fetch the metadata.xsd file + + @param repo_settings: RepoSettings instance + @returns: path to the metadata.xsd file + ''' + metadata_xsd = None + for path in reversed(repo_settings.repo_config.eclass_db.porttrees): + path = os.path.join(path, 'metadata/xml-schema/metadata.xsd') + if os.path.exists(path): + metadata_xsd = path + break + if metadata_xsd is None: + metadata_xsd = os.path.join( + repo_settings.repoman_settings["DISTDIR"], 'metadata.xsd' + ) + + fetch_metadata_xsd(metadata_xsd, repo_settings.repoman_settings) + return metadata_xsd diff --git a/pym/repoman/modules/scan/metadata/pkgmetadata.py b/pym/repoman/modules/scan/metadata/pkgmetadata.py index 317ab56..3ca7897 100644 --- a/pym/repoman/modules/scan/metadata/pkgmetadata.py +++ b/pym/repoman/modules/scan/metadata/pkgmetadata.py @@ -26,7 +26,6 @@ from repoman._portage import portage from repoman.metadata import metadata_dtd_uri from repoman.checks.herds.herdbase import get_herd_base from repoman.checks.herds.metadata import check_metadata, UnknownHerdsError -from repoman._xml import XmlLint from repoman.modules.scan.scanbase import ScanBase from portage.exception import InvalidAtom @@ -110,13 +109,11 @@ class PkgMetadata(ScanBase, USEFlagChecks): repo_settings = kwargs.get('repo_settings') self.qatracker = kwargs.get('qatracker') self.options = kwargs.get('options') - metadata_xsd = kwargs.get('metadata_xsd') + self.metadata_xsd = kwargs.get('metadata_xsd') self.globalUseFlags = kwargs.get('uselist') self.repoman_settings = repo_settings.repoman_settings self.musedict = {} self.muselist = set() - self.xmllint = XmlLint(self.options, self.repoman_settings, - metadata_xsd=metadata_xsd) def check(self, **kwargs): '''Performs the checks on the metadata.xml for the package @@ -129,7 +126,6 @@ class PkgMetadata(ScanBase, USEFlagChecks): xpkg = kwargs.get('xpkg') checkdir = kwargs.get('checkdir') checkdirlist = kwargs.get('checkdirlist').get() - repolevel = kwargs.get('repolevel') self.musedict = {} if self.options.mode in ['manifest']: @@ -221,7 +217,8 @@ class PkgMetadata(ScanBase, USEFlagChecks): # Only carry out if in package directory or check forced if not metadata_bad: - if not self.xmllint.check(checkdir, repolevel): + validator = etree.XMLSchema(file=self.metadata_xsd) + if not validator.validate(_metadata_xml): self.qatracker.add_error("metadata.bad", xpkg + "/metadata.xml") del metadata_bad self.muselist = frozenset(self.musedict) diff --git a/pym/repoman/scanner.py b/pym/repoman/scanner.py index fd07209..48d9001 100644 --- a/pym/repoman/scanner.py +++ b/pym/repoman/scanner.py @@ -10,6 +10,7 @@ from portage import normalize_path from portage import os from portage.output import green from portage.util.futures.extendedfutures import ExtendedFuture +from repoman.metadata import get_metadata_xsd from repoman.modules.commit import repochecks from repoman.profile import check_profiles, dev_profile_keywords, setup_profile from repoman.repos import repo_metadata @@ -56,13 +57,6 @@ class Scanner(object): portage.util.stack_lists([self.categories], incremental=1)) self.categories = self.repo_settings.repoman_settings.categories - metadata_xsd = None - for path in reversed(self.repo_settings.repo_config.eclass_db.porttrees): - path = os.path.join(path, 'metadata/xml-schema/metadata.xsd') - if os.path.exists(path): - metadata_xsd = path - break - self.portdb = repo_settings.portdb self.portdb.settings = self.repo_settings.repoman_settings # We really only need to cache the metadata that's necessary for visibility @@ -187,7 +181,7 @@ class Scanner(object): "qatracker": self.qatracker, "vcs_settings": self.vcs_settings, "options": self.options, - "metadata_xsd": metadata_xsd, + "metadata_xsd": get_metadata_xsd(self.repo_settings), "uselist": uselist, "checks": checks, "repo_metadata": self.repo_metadata,
WARNING: multiple messages have this Message-ID (diff)
From: "Brian Dolbec" <dolsen@gentoo.org> To: gentoo-commits@lists.gentoo.org Subject: [gentoo-commits] proj/portage:master commit in: pym/repoman/modules/scan/metadata/, pym/repoman/ Date: Sun, 15 May 2016 23:51:12 +0000 (UTC) [thread overview] Message-ID: <1463250580.2d2b99917ed054bea083a0e47f30aac34f0fefdd.dolsen@gentoo> (raw) Message-ID: <20160515235112.zcxPPmRMjwCGcrkGLS7GQ5YoRfWA3KN0mp7xZBgdMXM@z> (raw) commit: 2d2b99917ed054bea083a0e47f30aac34f0fefdd Author: Dirkjan Ochtman <dirkjan <AT> ochtman <DOT> nl> AuthorDate: Tue May 3 09:01:29 2016 +0000 Commit: Brian Dolbec <dolsen <AT> gentoo <DOT> org> CommitDate: Sat May 14 18:29:40 2016 +0000 URL: https://gitweb.gentoo.org/proj/portage.git/commit/?id=2d2b9991 repoman: Migrate from XmlLint to etree.XMLSchema for validation Remove No longer used repoman._xml module This change based on work by Dirkjan Ochtman <djc <AT> gentoo.org> Updated the change from XML.DTD to XMLSchema. Additionally: Move the metadata.xsd path determination code to metadata.py. Add the DISTDIR backup location and fetching of the file if missing or stale. pym/repoman/_xml.py | 105 ----------------------- pym/repoman/metadata.py | 21 +++++ pym/repoman/modules/scan/metadata/pkgmetadata.py | 9 +- pym/repoman/scanner.py | 10 +-- 4 files changed, 26 insertions(+), 119 deletions(-) diff --git a/pym/repoman/_xml.py b/pym/repoman/_xml.py deleted file mode 100644 index 33a536a..0000000 --- a/pym/repoman/_xml.py +++ /dev/null @@ -1,105 +0,0 @@ -# -*- coding:utf-8 -*- - -from __future__ import print_function, unicode_literals - -import sys -import xml - -# import our initialized portage instance -from repoman._portage import portage - -from portage import os -from portage.output import red -from portage.process import find_binary - -from repoman.metadata import fetch_metadata_xsd -from repoman._subprocess import repoman_getstatusoutput - - -class _XMLParser(xml.etree.ElementTree.XMLParser): - - def __init__(self, data, **kwargs): - xml.etree.ElementTree.XMLParser.__init__(self, **kwargs) - self._portage_data = data - if hasattr(self, 'parser'): - self._base_XmlDeclHandler = self.parser.XmlDeclHandler - self.parser.XmlDeclHandler = self._portage_XmlDeclHandler - self._base_StartDoctypeDeclHandler = \ - self.parser.StartDoctypeDeclHandler - self.parser.StartDoctypeDeclHandler = \ - self._portage_StartDoctypeDeclHandler - - def _portage_XmlDeclHandler(self, version, encoding, standalone): - if self._base_XmlDeclHandler is not None: - self._base_XmlDeclHandler(version, encoding, standalone) - self._portage_data["XML_DECLARATION"] = (version, encoding, standalone) - - def _portage_StartDoctypeDeclHandler( - self, doctypeName, systemId, publicId, has_internal_subset): - if self._base_StartDoctypeDeclHandler is not None: - self._base_StartDoctypeDeclHandler( - doctypeName, systemId, publicId, has_internal_subset) - self._portage_data["DOCTYPE"] = (doctypeName, systemId, publicId) - - -class _MetadataTreeBuilder(xml.etree.ElementTree.TreeBuilder): - """ - Implements doctype() as required to avoid deprecation warnings with - >=python-2.7. - """ - def doctype(self, name, pubid, system): - pass - - -class XmlLint(object): - - def __init__(self, options, repoman_settings, metadata_xsd=None): - self.metadata_xsd = (metadata_xsd or - os.path.join(repoman_settings["DISTDIR"], 'metadata.xsd')) - self.options = options - self.repoman_settings = repoman_settings - self._is_capable = metadata_xsd is not None - self.binary = None - self._check_capable() - - def _check_capable(self): - if self.options.mode == "manifest": - return - self.binary = find_binary('xmllint') - if not self.binary: - print(red("!!! xmllint not found. Can't check metadata.xml.\n")) - elif not self._is_capable: - if not fetch_metadata_xsd(self.metadata_xsd, self.repoman_settings): - sys.exit(1) - # this can be problematic if xmllint changes their output - self._is_capable = True - - @property - def capable(self): - return self._is_capable - - def check(self, checkdir, repolevel): - '''Runs checks on the package metadata.xml file - - @param checkdir: string, path - @param repolevel: integer - @return boolean, False == bad metadata - ''' - if not self.capable: - if self.options.xml_parse or repolevel == 3: - print("%s sorry, xmllint is needed. failing\n" % red("!!!")) - sys.exit(1) - return True - # xmlint can produce garbage output even on success, so only dump - # the ouput when it fails. - st, out = repoman_getstatusoutput( - self.binary + " --nonet --noout --schema %s %s" % ( - portage._shell_quote(self.metadata_xsd), - portage._shell_quote( - os.path.join(checkdir, "metadata.xml")))) - if st != os.EX_OK: - print(red("!!!") + " metadata.xml is invalid:") - for z in out.splitlines(): - print(red("!!! ") + z) - return False - return True diff --git a/pym/repoman/metadata.py b/pym/repoman/metadata.py index 7c64c8e..a9ad3e8 100644 --- a/pym/repoman/metadata.py +++ b/pym/repoman/metadata.py @@ -99,3 +99,24 @@ def fetch_metadata_xsd(metadata_xsd, repoman_settings): pass return True + + +def get_metadata_xsd(repo_settings): + '''Locate and or fetch the metadata.xsd file + + @param repo_settings: RepoSettings instance + @returns: path to the metadata.xsd file + ''' + metadata_xsd = None + for path in reversed(repo_settings.repo_config.eclass_db.porttrees): + path = os.path.join(path, 'metadata/xml-schema/metadata.xsd') + if os.path.exists(path): + metadata_xsd = path + break + if metadata_xsd is None: + metadata_xsd = os.path.join( + repo_settings.repoman_settings["DISTDIR"], 'metadata.xsd' + ) + + fetch_metadata_xsd(metadata_xsd, repo_settings.repoman_settings) + return metadata_xsd diff --git a/pym/repoman/modules/scan/metadata/pkgmetadata.py b/pym/repoman/modules/scan/metadata/pkgmetadata.py index 317ab56..3ca7897 100644 --- a/pym/repoman/modules/scan/metadata/pkgmetadata.py +++ b/pym/repoman/modules/scan/metadata/pkgmetadata.py @@ -26,7 +26,6 @@ from repoman._portage import portage from repoman.metadata import metadata_dtd_uri from repoman.checks.herds.herdbase import get_herd_base from repoman.checks.herds.metadata import check_metadata, UnknownHerdsError -from repoman._xml import XmlLint from repoman.modules.scan.scanbase import ScanBase from portage.exception import InvalidAtom @@ -110,13 +109,11 @@ class PkgMetadata(ScanBase, USEFlagChecks): repo_settings = kwargs.get('repo_settings') self.qatracker = kwargs.get('qatracker') self.options = kwargs.get('options') - metadata_xsd = kwargs.get('metadata_xsd') + self.metadata_xsd = kwargs.get('metadata_xsd') self.globalUseFlags = kwargs.get('uselist') self.repoman_settings = repo_settings.repoman_settings self.musedict = {} self.muselist = set() - self.xmllint = XmlLint(self.options, self.repoman_settings, - metadata_xsd=metadata_xsd) def check(self, **kwargs): '''Performs the checks on the metadata.xml for the package @@ -129,7 +126,6 @@ class PkgMetadata(ScanBase, USEFlagChecks): xpkg = kwargs.get('xpkg') checkdir = kwargs.get('checkdir') checkdirlist = kwargs.get('checkdirlist').get() - repolevel = kwargs.get('repolevel') self.musedict = {} if self.options.mode in ['manifest']: @@ -221,7 +217,8 @@ class PkgMetadata(ScanBase, USEFlagChecks): # Only carry out if in package directory or check forced if not metadata_bad: - if not self.xmllint.check(checkdir, repolevel): + validator = etree.XMLSchema(file=self.metadata_xsd) + if not validator.validate(_metadata_xml): self.qatracker.add_error("metadata.bad", xpkg + "/metadata.xml") del metadata_bad self.muselist = frozenset(self.musedict) diff --git a/pym/repoman/scanner.py b/pym/repoman/scanner.py index fd07209..48d9001 100644 --- a/pym/repoman/scanner.py +++ b/pym/repoman/scanner.py @@ -10,6 +10,7 @@ from portage import normalize_path from portage import os from portage.output import green from portage.util.futures.extendedfutures import ExtendedFuture +from repoman.metadata import get_metadata_xsd from repoman.modules.commit import repochecks from repoman.profile import check_profiles, dev_profile_keywords, setup_profile from repoman.repos import repo_metadata @@ -56,13 +57,6 @@ class Scanner(object): portage.util.stack_lists([self.categories], incremental=1)) self.categories = self.repo_settings.repoman_settings.categories - metadata_xsd = None - for path in reversed(self.repo_settings.repo_config.eclass_db.porttrees): - path = os.path.join(path, 'metadata/xml-schema/metadata.xsd') - if os.path.exists(path): - metadata_xsd = path - break - self.portdb = repo_settings.portdb self.portdb.settings = self.repo_settings.repoman_settings # We really only need to cache the metadata that's necessary for visibility @@ -187,7 +181,7 @@ class Scanner(object): "qatracker": self.qatracker, "vcs_settings": self.vcs_settings, "options": self.options, - "metadata_xsd": metadata_xsd, + "metadata_xsd": get_metadata_xsd(self.repo_settings), "uselist": uselist, "checks": checks, "repo_metadata": self.repo_metadata,
next reply other threads:[~2016-05-14 18:34 UTC|newest] Thread overview: 41+ messages / expand[flat|nested] mbox.gz Atom feed top 2016-05-14 18:33 Brian Dolbec [this message] 2016-05-15 23:51 ` [gentoo-commits] proj/portage:master commit in: pym/repoman/modules/scan/metadata/, pym/repoman/ Brian Dolbec -- strict thread matches above, loose matches on Subject: below -- 2016-05-14 18:33 [gentoo-commits] proj/portage:repoman " Brian Dolbec 2016-05-08 21:21 Brian Dolbec 2016-05-08 21:21 Brian Dolbec 2016-05-03 9:33 Brian Dolbec 2016-04-25 15:07 Brian Dolbec 2016-03-11 0:41 Brian Dolbec 2016-03-07 21:53 Brian Dolbec 2016-01-31 20:03 Brian Dolbec 2016-01-31 20:03 Brian Dolbec 2016-01-31 20:03 Brian Dolbec 2016-01-30 8:00 Brian Dolbec 2016-01-30 8:00 Brian Dolbec 2016-01-30 8:00 Brian Dolbec 2016-01-30 6:58 Brian Dolbec 2016-01-30 6:58 Brian Dolbec 2016-01-29 5:01 Brian Dolbec 2016-01-27 23:15 Brian Dolbec 2016-01-27 23:15 Brian Dolbec 2016-01-27 23:15 Brian Dolbec 2016-01-23 1:42 Brian Dolbec 2016-01-23 1:42 Brian Dolbec 2016-01-23 1:42 Brian Dolbec 2016-01-22 20:55 Brian Dolbec 2016-01-21 19:42 Brian Dolbec 2016-01-21 19:42 Brian Dolbec 2016-01-21 18:30 Brian Dolbec 2016-01-21 18:30 Brian Dolbec 2016-01-21 18:30 Brian Dolbec 2016-01-21 18:30 Brian Dolbec 2016-01-18 19:23 Brian Dolbec 2016-01-11 8:01 Brian Dolbec 2016-01-11 6:31 Brian Dolbec 2016-01-11 6:31 Brian Dolbec 2016-01-11 6:31 Brian Dolbec 2016-01-10 20:17 Brian Dolbec 2016-01-10 3:26 Brian Dolbec 2016-01-10 3:25 Brian Dolbec 2016-01-06 4:21 Brian Dolbec 2016-01-06 4:21 Brian Dolbec
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=1463250580.2d2b99917ed054bea083a0e47f30aac34f0fefdd.dolsen@gentoo \ --to=dolsen@gentoo.org \ --cc=gentoo-commits@lists.gentoo.org \ --cc=gentoo-dev@lists.gentoo.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox