* [gentoo-portage-dev] [patch] repoman: metadata.xml checks re-write
@ 2016-05-04 0:31 Brian Dolbec
0 siblings, 0 replies; only message in thread
From: Brian Dolbec @ 2016-05-04 0:31 UTC (permalink / raw
To: gentoo-portage-dev
[-- Attachment #1: Type: text/plain, Size: 1391 bytes --]
I've rebased djc's re-write work [1] onto the current master.
I've done a few small changes along the way like removed unused
variables, fix imports... But tried to stay with his original commits
for the most part. I did however just remove the herds checks from
being done. I've left the code in place still in case it can be
re-cycled for a projects check instead.
I then continued to rewrite the code in order for it to take advantage
of the XMLSchema error reporting. The current code as well as the
release code are not outputting all the errors in the metadata.xml.
Just compare runs on gen-b0rk/xml-test/missing-attributes pkg, you'll
see that it stops after the one error. This new code should output any
and all errors that the in-tree XMLSchema file is capable of.
There are currently 12 commits in the repoman branch [2] of the portage
repo. Gokturk is working on some more test ebuilds/errors and code to
add to it still. But it would be good to get this code reviewed up to
this point. Even without additional tests or checks added, this code
is probably better than the current code for a repoman release.
I've attached a git diff of the repoman branch to master, individual
commits can be looked at via your favourite means.
[1] https://github.com/djc/portage/commits/repoman-metadata
[2] https://gitweb.gentoo.org/proj/portage.git/
--
Brian Dolbec <dolsen>
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: metadada.xml-checks-rewrite.diff --]
[-- Type: text/x-patch, Size: 19250 bytes --]
diff --git a/pym/repoman/_xml.py b/pym/repoman/_xml.py
deleted file mode 100644
index 33a536a..0000000
--- a/pym/repoman/_xml.py
+++ /dev/null
@@ -1,105 +0,0 @@
-# -*- coding:utf-8 -*-
-
-from __future__ import print_function, unicode_literals
-
-import sys
-import xml
-
-# import our initialized portage instance
-from repoman._portage import portage
-
-from portage import os
-from portage.output import red
-from portage.process import find_binary
-
-from repoman.metadata import fetch_metadata_xsd
-from repoman._subprocess import repoman_getstatusoutput
-
-
-class _XMLParser(xml.etree.ElementTree.XMLParser):
-
- def __init__(self, data, **kwargs):
- xml.etree.ElementTree.XMLParser.__init__(self, **kwargs)
- self._portage_data = data
- if hasattr(self, 'parser'):
- self._base_XmlDeclHandler = self.parser.XmlDeclHandler
- self.parser.XmlDeclHandler = self._portage_XmlDeclHandler
- self._base_StartDoctypeDeclHandler = \
- self.parser.StartDoctypeDeclHandler
- self.parser.StartDoctypeDeclHandler = \
- self._portage_StartDoctypeDeclHandler
-
- def _portage_XmlDeclHandler(self, version, encoding, standalone):
- if self._base_XmlDeclHandler is not None:
- self._base_XmlDeclHandler(version, encoding, standalone)
- self._portage_data["XML_DECLARATION"] = (version, encoding, standalone)
-
- def _portage_StartDoctypeDeclHandler(
- self, doctypeName, systemId, publicId, has_internal_subset):
- if self._base_StartDoctypeDeclHandler is not None:
- self._base_StartDoctypeDeclHandler(
- doctypeName, systemId, publicId, has_internal_subset)
- self._portage_data["DOCTYPE"] = (doctypeName, systemId, publicId)
-
-
-class _MetadataTreeBuilder(xml.etree.ElementTree.TreeBuilder):
- """
- Implements doctype() as required to avoid deprecation warnings with
- >=python-2.7.
- """
- def doctype(self, name, pubid, system):
- pass
-
-
-class XmlLint(object):
-
- def __init__(self, options, repoman_settings, metadata_xsd=None):
- self.metadata_xsd = (metadata_xsd or
- os.path.join(repoman_settings["DISTDIR"], 'metadata.xsd'))
- self.options = options
- self.repoman_settings = repoman_settings
- self._is_capable = metadata_xsd is not None
- self.binary = None
- self._check_capable()
-
- def _check_capable(self):
- if self.options.mode == "manifest":
- return
- self.binary = find_binary('xmllint')
- if not self.binary:
- print(red("!!! xmllint not found. Can't check metadata.xml.\n"))
- elif not self._is_capable:
- if not fetch_metadata_xsd(self.metadata_xsd, self.repoman_settings):
- sys.exit(1)
- # this can be problematic if xmllint changes their output
- self._is_capable = True
-
- @property
- def capable(self):
- return self._is_capable
-
- def check(self, checkdir, repolevel):
- '''Runs checks on the package metadata.xml file
-
- @param checkdir: string, path
- @param repolevel: integer
- @return boolean, False == bad metadata
- '''
- if not self.capable:
- if self.options.xml_parse or repolevel == 3:
- print("%s sorry, xmllint is needed. failing\n" % red("!!!"))
- sys.exit(1)
- return True
- # xmlint can produce garbage output even on success, so only dump
- # the ouput when it fails.
- st, out = repoman_getstatusoutput(
- self.binary + " --nonet --noout --schema %s %s" % (
- portage._shell_quote(self.metadata_xsd),
- portage._shell_quote(
- os.path.join(checkdir, "metadata.xml"))))
- if st != os.EX_OK:
- print(red("!!!") + " metadata.xml is invalid:")
- for z in out.splitlines():
- print(red("!!! ") + z)
- return False
- return True
diff --git a/pym/repoman/metadata.py b/pym/repoman/metadata.py
index 7a514dc..a9ad3e8 100644
--- a/pym/repoman/metadata.py
+++ b/pym/repoman/metadata.py
@@ -17,7 +17,6 @@ except ImportError:
# import our initialized portage instance
from repoman._portage import portage
-from portage import exception
from portage import os
from portage import shutil
from portage.output import green
@@ -28,65 +27,12 @@ if sys.hexversion >= 0x3000000:
if sys.hexversion >= 0x3000000:
basestring = str
-metadata_xml_encoding = 'UTF-8'
-metadata_xml_declaration = '<?xml version="1.0" encoding="%s"?>' \
- % (metadata_xml_encoding,)
-metadata_doctype_name = 'pkgmetadata'
metadata_dtd_uri = 'http://www.gentoo.org/dtd/metadata.dtd'
metadata_xsd_uri = 'http://www.gentoo.org/xml-schema/metadata.xsd'
# force refetch if the local copy creation time is older than this
metadata_xsd_ctime_interval = 60 * 60 * 24 * 7 # 7 days
-def parse_metadata_use(xml_tree):
- """
- Records are wrapped in XML as per GLEP 56
- returns a dict with keys constisting of USE flag names and values
- containing their respective descriptions
- """
- uselist = {}
-
- usetags = xml_tree.findall("use")
- if not usetags:
- return uselist
-
- # It's possible to have multiple 'use' elements.
- for usetag in usetags:
- flags = usetag.findall("flag")
- if not flags:
- # DTD allows use elements containing no flag elements.
- continue
-
- for flag in flags:
- pkg_flag = flag.get("name")
- if pkg_flag is None:
- raise exception.ParseError("missing 'name' attribute for 'flag' tag")
- flag_restrict = flag.get("restrict")
-
- # emulate the Element.itertext() method from python-2.7
- inner_text = []
- stack = []
- stack.append(flag)
- while stack:
- obj = stack.pop()
- if isinstance(obj, basestring):
- inner_text.append(obj)
- continue
- if isinstance(obj.text, basestring):
- inner_text.append(obj.text)
- if isinstance(obj.tail, basestring):
- stack.append(obj.tail)
- stack.extend(reversed(obj))
-
- if pkg_flag not in uselist:
- uselist[pkg_flag] = {}
-
- # (flag_restrict can be None)
- uselist[pkg_flag][flag_restrict] = " ".join("".join(inner_text).split())
-
- return uselist
-
-
def fetch_metadata_xsd(metadata_xsd, repoman_settings):
"""
Fetch metadata.xsd if it doesn't exist or the ctime is older than
@@ -153,3 +99,24 @@ def fetch_metadata_xsd(metadata_xsd, repoman_settings):
pass
return True
+
+
+def get_metadata_xsd(repo_settings):
+ '''Locate and or fetch the metadata.xsd file
+
+ @param repo_settings: RepoSettings instance
+ @returns: path to the metadata.xsd file
+ '''
+ metadata_xsd = None
+ for path in reversed(repo_settings.repo_config.eclass_db.porttrees):
+ path = os.path.join(path, 'metadata/xml-schema/metadata.xsd')
+ if os.path.exists(path):
+ metadata_xsd = path
+ break
+ if metadata_xsd is None:
+ metadata_xsd = os.path.join(
+ repo_settings.repoman_settings["DISTDIR"], 'metadata.xsd'
+ )
+
+ fetch_metadata_xsd(metadata_xsd, repo_settings.repoman_settings)
+ return metadata_xsd
diff --git a/pym/repoman/modules/scan/metadata/pkgmetadata.py b/pym/repoman/modules/scan/metadata/pkgmetadata.py
index 5c6452a..44b5edd 100644
--- a/pym/repoman/modules/scan/metadata/pkgmetadata.py
+++ b/pym/repoman/modules/scan/metadata/pkgmetadata.py
@@ -7,8 +7,8 @@ import sys
from itertools import chain
try:
- import xml.etree.ElementTree
- from xml.parsers.expat import ExpatError
+ from lxml import etree
+ from lxml.etree import ParserError
except (SystemExit, KeyboardInterrupt):
raise
except (ImportError, SystemError, RuntimeError, Exception):
@@ -23,21 +23,20 @@ except (ImportError, SystemError, RuntimeError, Exception):
# import our initialized portage instance
from repoman._portage import portage
-from repoman.metadata import (
- metadata_xml_encoding, metadata_doctype_name,
- metadata_dtd_uri, metadata_xml_declaration, parse_metadata_use)
-from repoman.checks.herds.herdbase import get_herd_base
-from repoman.checks.herds.metadata import check_metadata, UnknownHerdsError
-from repoman._xml import _XMLParser, _MetadataTreeBuilder, XmlLint
+from repoman.metadata import metadata_dtd_uri
from repoman.modules.scan.scanbase import ScanBase
from portage.exception import InvalidAtom
from portage import os
-from portage import _encodings, _unicode_encode
from portage.dep import Atom
from .use_flags import USEFlagChecks
+metadata_xml_encoding = 'UTF-8'
+metadata_xml_declaration = '<?xml version="1.0" encoding="%s"?>' \
+ % (metadata_xml_encoding,)
+metadata_doctype_name = 'pkgmetadata'
+
class PkgMetadata(ScanBase, USEFlagChecks):
'''Package metadata.xml checks'''
@@ -54,13 +53,11 @@ class PkgMetadata(ScanBase, USEFlagChecks):
repo_settings = kwargs.get('repo_settings')
self.qatracker = kwargs.get('qatracker')
self.options = kwargs.get('options')
- metadata_xsd = kwargs.get('metadata_xsd')
+ self.metadata_xsd = kwargs.get('metadata_xsd')
self.globalUseFlags = kwargs.get('uselist')
self.repoman_settings = repo_settings.repoman_settings
self.musedict = {}
self.muselist = set()
- self.xmllint = XmlLint(self.options, self.repoman_settings,
- metadata_xsd=metadata_xsd)
def check(self, **kwargs):
'''Performs the checks on the metadata.xml for the package
@@ -73,7 +70,6 @@ class PkgMetadata(ScanBase, USEFlagChecks):
xpkg = kwargs.get('xpkg')
checkdir = kwargs.get('checkdir')
checkdirlist = kwargs.get('checkdirlist').get()
- repolevel = kwargs.get('repolevel')
self.musedict = {}
if self.options.mode in ['manifest']:
@@ -83,112 +79,75 @@ class PkgMetadata(ScanBase, USEFlagChecks):
# metadata.xml file check
if "metadata.xml" not in checkdirlist:
self.qatracker.add_error("metadata.missing", xpkg + "/metadata.xml")
+ self.muselist = frozenset(self.musedict)
+ return False
+
# metadata.xml parse check
- else:
- metadata_bad = False
- xml_info = {}
- xml_parser = _XMLParser(xml_info, target=_MetadataTreeBuilder())
+ metadata_bad = False
+
+ # read metadata.xml into memory
+ try:
+ _metadata_xml = etree.parse(os.path.join(checkdir, 'metadata.xml'))
+ except (ParserError, SyntaxError, EnvironmentError) as e:
+ metadata_bad = True
+ self.qatracker.add_error("metadata.bad", "%s/metadata.xml: %s" % (xpkg, e))
+ del e
+ self.muselist = frozenset(self.musedict)
+ return False
- # read metadata.xml into memory
+ xml_encoding = _metadata_xml.docinfo.encoding
+ if xml_encoding.upper() != metadata_xml_encoding:
+ self.qatracker.add_error(
+ "metadata.bad", "%s/metadata.xml: "
+ "xml declaration encoding should be '%s', not '%s'" %
+ (xpkg, metadata_xml_encoding, xml_encoding))
+
+ if not _metadata_xml.docinfo:
+ metadata_bad = True
+ self.qatracker.add_error(
+ "metadata.bad",
+ "%s/metadata.xml: %s" % (xpkg, "DOCTYPE is missing"))
+ else:
+ doctype_system = _metadata_xml.docinfo.system_url
+ if doctype_system != metadata_dtd_uri:
+ if doctype_system is None:
+ system_problem = "but it is undefined"
+ else:
+ system_problem = "not '%s'" % doctype_system
+ self.qatracker.add_error(
+ "metadata.bad", "%s/metadata.xml: "
+ "DOCTYPE: SYSTEM should refer to '%s', %s" %
+ (xpkg, metadata_dtd_uri, system_problem))
+ doctype_name = _metadata_xml.docinfo.doctype.split(' ')[1]
+ if doctype_name != metadata_doctype_name:
+ self.qatracker.add_error(
+ "metadata.bad", "%s/metadata.xml: "
+ "DOCTYPE: name should be '%s', not '%s'" %
+ (xpkg, metadata_doctype_name, doctype_name))
+
+ # load USE flags from metadata.xml
+ self.musedict = self._parse_metadata_use(_metadata_xml, xpkg)
+ for atom in chain(*self.musedict.values()):
+ if atom is None:
+ continue
try:
- _metadata_xml = xml.etree.ElementTree.parse(
- _unicode_encode(
- os.path.join(checkdir, "metadata.xml"),
- encoding=_encodings['fs'], errors='strict'),
- parser=xml_parser)
- except (ExpatError, SyntaxError, EnvironmentError) as e:
- metadata_bad = True
- self.qatracker.add_error("metadata.bad", "%s/metadata.xml: %s" % (xpkg, e))
- del e
+ atom = Atom(atom)
+ except InvalidAtom as e:
+ self.qatracker.add_error(
+ "metadata.bad",
+ "%s/metadata.xml: Invalid atom: %s" % (xpkg, e))
else:
- if not hasattr(xml_parser, 'parser') or \
- sys.hexversion < 0x2070000 or \
- (sys.hexversion > 0x3000000 and sys.hexversion < 0x3020000):
- # doctype is not parsed with python 2.6 or 3.1
- pass
- else:
- if "XML_DECLARATION" not in xml_info:
- self.qatracker.add_error(
- "metadata.bad", "%s/metadata.xml: "
- "xml declaration is missing on first line, "
- "should be '%s'" % (xpkg, metadata_xml_declaration))
- else:
- xml_version, xml_encoding, xml_standalone = \
- xml_info["XML_DECLARATION"]
- if xml_encoding is None or \
- xml_encoding.upper() != metadata_xml_encoding:
- if xml_encoding is None:
- encoding_problem = "but it is undefined"
- else:
- encoding_problem = "not '%s'" % xml_encoding
- self.qatracker.add_error(
- "metadata.bad", "%s/metadata.xml: "
- "xml declaration encoding should be '%s', %s" %
- (xpkg, metadata_xml_encoding, encoding_problem))
-
- if "DOCTYPE" not in xml_info:
- metadata_bad = True
- self.qatracker.add_error(
- "metadata.bad",
- "%s/metadata.xml: %s" % (xpkg, "DOCTYPE is missing"))
- else:
- doctype_name, doctype_system, doctype_pubid = \
- xml_info["DOCTYPE"]
- if doctype_system != metadata_dtd_uri:
- if doctype_system is None:
- system_problem = "but it is undefined"
- else:
- system_problem = "not '%s'" % doctype_system
- self.qatracker.add_error(
- "metadata.bad", "%s/metadata.xml: "
- "DOCTYPE: SYSTEM should refer to '%s', %s" %
- (xpkg, metadata_dtd_uri, system_problem))
-
- if doctype_name != metadata_doctype_name:
- self.qatracker.add_error(
- "metadata.bad", "%s/metadata.xml: "
- "DOCTYPE: name should be '%s', not '%s'" %
- (xpkg, metadata_doctype_name, doctype_name))
-
- # load USE flags from metadata.xml
- try:
- self.musedict = parse_metadata_use(_metadata_xml)
- except portage.exception.ParseError as e:
- metadata_bad = True
+ if atom.cp != xpkg:
self.qatracker.add_error(
- "metadata.bad", "%s/metadata.xml: %s" % (xpkg, e))
- else:
- for atom in chain(*self.musedict.values()):
- if atom is None:
- continue
- try:
- atom = Atom(atom)
- except InvalidAtom as e:
- self.qatracker.add_error(
- "metadata.bad",
- "%s/metadata.xml: Invalid atom: %s" % (xpkg, e))
- else:
- if atom.cp != xpkg:
- self.qatracker.add_error(
- "metadata.bad",
- "%s/metadata.xml: Atom contains "
- "unexpected cat/pn: %s" % (xpkg, atom))
-
- # Run other metadata.xml checkers
- try:
- check_metadata(_metadata_xml, get_herd_base(
- self.repoman_settings))
- except (UnknownHerdsError, ) as e:
- metadata_bad = True
- self.qatracker.add_error(
- "metadata.bad", "%s/metadata.xml: %s" % (xpkg, e))
- del e
-
- # Only carry out if in package directory or check forced
- if not metadata_bad:
- if not self.xmllint.check(checkdir, repolevel):
- self.qatracker.add_error("metadata.bad", xpkg + "/metadata.xml")
- del metadata_bad
+ "metadata.bad",
+ "%s/metadata.xml: Atom contains "
+ "unexpected cat/pn: %s" % (xpkg, atom))
+
+ # Only carry out if in package directory or check forced
+ if not metadata_bad:
+ validator = etree.XMLSchema(file=self.metadata_xsd)
+ if not validator.validate(_metadata_xml):
+ self._add_validate_errors(xpkg, validator.error_log)
self.muselist = frozenset(self.musedict)
return False
@@ -211,6 +170,59 @@ class PkgMetadata(ScanBase, USEFlagChecks):
% (xpkg, myflag))
return False
+ def _parse_metadata_use(self, xml_tree, xpkg):
+ """
+ Records are wrapped in XML as per GLEP 56
+ returns a dict with keys constisting of USE flag names and values
+ containing their respective descriptions
+ """
+ uselist = {}
+
+ usetags = xml_tree.findall("use")
+ if not usetags:
+ return uselist
+
+ # It's possible to have multiple 'use' elements.
+ for usetag in usetags:
+ flags = usetag.findall("flag")
+ if not flags:
+ # DTD allows use elements containing no flag elements.
+ continue
+
+ for flag in flags:
+ pkg_flag = flag.get("name")
+ if pkg_flag is not None:
+ flag_restrict = flag.get("restrict")
+
+ # emulate the Element.itertext() method from python-2.7
+ inner_text = []
+ stack = []
+ stack.append(flag)
+ while stack:
+ obj = stack.pop()
+ if isinstance(obj, basestring):
+ inner_text.append(obj)
+ continue
+ if isinstance(obj.text, basestring):
+ inner_text.append(obj.text)
+ if isinstance(obj.tail, basestring):
+ stack.append(obj.tail)
+ stack.extend(reversed(obj))
+
+ if flag.get("name") not in uselist:
+ uselist[flag.get("name")] = {}
+
+ # (flag_restrict can be None)
+ uselist[flag.get("name")][flag_restrict] = " ".join("".join(inner_text).split())
+ return uselist
+
+ def _add_validate_errors(self, xpkg, log):
+ for error in log:
+ self.qatracker.add_error(
+ "metadata.bad",
+ "%s/metadata.xml: line: %s, %s"
+ % (xpkg, error.line, error.message))
+
@property
def runInPkgs(self):
'''Package level scans'''
diff --git a/pym/repoman/scanner.py b/pym/repoman/scanner.py
index fd07209..48d9001 100644
--- a/pym/repoman/scanner.py
+++ b/pym/repoman/scanner.py
@@ -10,6 +10,7 @@ from portage import normalize_path
from portage import os
from portage.output import green
from portage.util.futures.extendedfutures import ExtendedFuture
+from repoman.metadata import get_metadata_xsd
from repoman.modules.commit import repochecks
from repoman.profile import check_profiles, dev_profile_keywords, setup_profile
from repoman.repos import repo_metadata
@@ -56,13 +57,6 @@ class Scanner(object):
portage.util.stack_lists([self.categories], incremental=1))
self.categories = self.repo_settings.repoman_settings.categories
- metadata_xsd = None
- for path in reversed(self.repo_settings.repo_config.eclass_db.porttrees):
- path = os.path.join(path, 'metadata/xml-schema/metadata.xsd')
- if os.path.exists(path):
- metadata_xsd = path
- break
-
self.portdb = repo_settings.portdb
self.portdb.settings = self.repo_settings.repoman_settings
# We really only need to cache the metadata that's necessary for visibility
@@ -187,7 +181,7 @@ class Scanner(object):
"qatracker": self.qatracker,
"vcs_settings": self.vcs_settings,
"options": self.options,
- "metadata_xsd": metadata_xsd,
+ "metadata_xsd": get_metadata_xsd(self.repo_settings),
"uselist": uselist,
"checks": checks,
"repo_metadata": self.repo_metadata,
^ permalink raw reply related [flat|nested] only message in thread
only message in thread, other threads:[~2016-05-04 0:32 UTC | newest]
Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-05-04 0:31 [gentoo-portage-dev] [patch] repoman: metadata.xml checks re-write Brian Dolbec
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox