public inbox for gentoo-commits@lists.gentoo.org
 help / color / mirror / Atom feed
From: "Sam James" <sam@gentoo.org>
To: gentoo-commits@lists.gentoo.org
Subject: [gentoo-commits] repo/gentoo:master commit in: dev-libs/libxml2/, dev-libs/libxml2/files/
Date: Thu, 20 May 2021 01:46:59 +0000 (UTC)	[thread overview]
Message-ID: <1621475171.6d220e09b25048d28d6598f8a6ffb62f0fbe92b4.sam@gentoo> (raw)

commit:     6d220e09b25048d28d6598f8a6ffb62f0fbe92b4
Author:     Sam James <sam <AT> gentoo <DOT> org>
AuthorDate: Thu May 20 01:33:39 2021 +0000
Commit:     Sam James <sam <AT> gentoo <DOT> org>
CommitDate: Thu May 20 01:46:11 2021 +0000
URL:        https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=6d220e09

dev-libs/libxml2: include lxml compatibility patch

Bug: https://bugs.gentoo.org/790737
Signed-off-by: Sam James <sam <AT> gentoo.org>

 .../libxml2-2.9.12-fix-lxml-compatibility.patch    | 214 ++++++++++++++++++
 dev-libs/libxml2/libxml2-2.9.12-r1.ebuild          | 245 +++++++++++++++++++++
 2 files changed, 459 insertions(+)

diff --git a/dev-libs/libxml2/files/libxml2-2.9.12-fix-lxml-compatibility.patch b/dev-libs/libxml2/files/libxml2-2.9.12-fix-lxml-compatibility.patch
new file mode 100644
index 00000000000..844266038d3
--- /dev/null
+++ b/dev-libs/libxml2/files/libxml2-2.9.12-fix-lxml-compatibility.patch
@@ -0,0 +1,214 @@
+https://gitlab.gnome.org/nwellnhof/libxml2/-/commit/7955b0d6fbbe49392ccc2e511edd00fbbfcb5a10.patch
+https://gitlab.gnome.org/GNOME/libxml2/-/issues/255
+https://bugs.gentoo.org/790737
+
+From 7955b0d6fbbe49392ccc2e511edd00fbbfcb5a10 Mon Sep 17 00:00:00 2001
+From: Nick Wellnhofer <wellnhofer@aevum.de>
+Date: Tue, 18 May 2021 20:08:28 +0200
+Subject: [PATCH] Work around lxml API abuse
+
+Make xmlNodeDumpOutput and htmlNodeDumpFormatOutput work with corrupted
+parent pointers. This used to work with the old recursive code but the
+non-recursive rewrite required parent pointers to be set correctly.
+
+Unfortunately, lxml relies on the old behavior and passes subtrees with
+a corrupted structure. Fall back to a recursive function call if an
+invalid parent pointer is detected.
+
+Fixes #255.
+---
+ HTMLtree.c | 46 ++++++++++++++++++++++++++++------------------
+ xmlsave.c  | 31 +++++++++++++++++++++----------
+ 2 files changed, 49 insertions(+), 28 deletions(-)
+
+diff --git a/HTMLtree.c b/HTMLtree.c
+index 24434d453..bdd639c7f 100644
+--- a/HTMLtree.c
++++ b/HTMLtree.c
+@@ -744,7 +744,7 @@ void
+ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
+ 	                 xmlNodePtr cur, const char *encoding ATTRIBUTE_UNUSED,
+                          int format) {
+-    xmlNodePtr root;
++    xmlNodePtr root, parent;
+     xmlAttrPtr attr;
+     const htmlElemDesc * info;
+ 
+@@ -755,6 +755,7 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
+     }
+ 
+     root = cur;
++    parent = cur->parent;
+     while (1) {
+         switch (cur->type) {
+         case XML_HTML_DOCUMENT_NODE:
+@@ -762,13 +763,25 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
+             if (((xmlDocPtr) cur)->intSubset != NULL) {
+                 htmlDtdDumpOutput(buf, (xmlDocPtr) cur, NULL);
+             }
+-            if (cur->children != NULL) {
++            /* Always validate cur->parent when descending. */
++            if ((cur->parent == parent) && (cur->children != NULL)) {
++                parent = cur;
+                 cur = cur->children;
+                 continue;
+             }
+             break;
+ 
+         case XML_ELEMENT_NODE:
++            /*
++             * Some users like lxml are known to pass nodes with a corrupted
++             * tree structure. Fall back to a recursive call to handle this
++             * case.
++             */
++            if ((cur->parent != parent) && (cur->children != NULL)) {
++                htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
++                break;
++            }
++
+             /*
+              * Get specific HTML info for that node.
+              */
+@@ -817,6 +830,7 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
+                     (cur->name != NULL) &&
+                     (cur->name[0] != 'p')) /* p, pre, param */
+                     xmlOutputBufferWriteString(buf, "\n");
++                parent = cur;
+                 cur = cur->children;
+                 continue;
+             }
+@@ -825,9 +839,9 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
+                 (info != NULL) && (!info->isinline)) {
+                 if ((cur->next->type != HTML_TEXT_NODE) &&
+                     (cur->next->type != HTML_ENTITY_REF_NODE) &&
+-                    (cur->parent != NULL) &&
+-                    (cur->parent->name != NULL) &&
+-                    (cur->parent->name[0] != 'p')) /* p, pre, param */
++                    (parent != NULL) &&
++                    (parent->name != NULL) &&
++                    (parent->name[0] != 'p')) /* p, pre, param */
+                     xmlOutputBufferWriteString(buf, "\n");
+             }
+ 
+@@ -842,9 +856,9 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
+                 break;
+             if (((cur->name == (const xmlChar *)xmlStringText) ||
+                  (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
+-                ((cur->parent == NULL) ||
+-                 ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) &&
+-                  (xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) {
++                ((parent == NULL) ||
++                 ((xmlStrcasecmp(parent->name, BAD_CAST "script")) &&
++                  (xmlStrcasecmp(parent->name, BAD_CAST "style"))))) {
+                 xmlChar *buffer;
+ 
+                 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
+@@ -902,13 +916,9 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
+                 break;
+             }
+ 
+-            /*
+-             * The parent should never be NULL here but we want to handle
+-             * corrupted documents gracefully.
+-             */
+-            if (cur->parent == NULL)
+-                return;
+-            cur = cur->parent;
++            cur = parent;
++            /* cur->parent was validated when descending. */
++            parent = cur->parent;
+ 
+             if ((cur->type == XML_HTML_DOCUMENT_NODE) ||
+                 (cur->type == XML_DOCUMENT_NODE)) {
+@@ -939,9 +949,9 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
+                     (cur->next != NULL)) {
+                     if ((cur->next->type != HTML_TEXT_NODE) &&
+                         (cur->next->type != HTML_ENTITY_REF_NODE) &&
+-                        (cur->parent != NULL) &&
+-                        (cur->parent->name != NULL) &&
+-                        (cur->parent->name[0] != 'p')) /* p, pre, param */
++                        (parent != NULL) &&
++                        (parent->name != NULL) &&
++                        (parent->name[0] != 'p')) /* p, pre, param */
+                         xmlOutputBufferWriteString(buf, "\n");
+                 }
+             }
+diff --git a/xmlsave.c b/xmlsave.c
+index 61a40459b..aedbd5e70 100644
+--- a/xmlsave.c
++++ b/xmlsave.c
+@@ -847,7 +847,7 @@ htmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
+ static void
+ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
+     int format = ctxt->format;
+-    xmlNodePtr tmp, root, unformattedNode = NULL;
++    xmlNodePtr tmp, root, unformattedNode = NULL, parent;
+     xmlAttrPtr attr;
+     xmlChar *start, *end;
+     xmlOutputBufferPtr buf;
+@@ -856,6 +856,7 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
+     buf = ctxt->buf;
+ 
+     root = cur;
++    parent = cur->parent;
+     while (1) {
+         switch (cur->type) {
+         case XML_DOCUMENT_NODE:
+@@ -868,7 +869,9 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
+             break;
+ 
+         case XML_DOCUMENT_FRAG_NODE:
+-            if (cur->children != NULL) {
++            /* Always validate cur->parent when descending. */
++            if ((cur->parent == parent) && (cur->children != NULL)) {
++                parent = cur;
+                 cur = cur->children;
+                 continue;
+             }
+@@ -887,7 +890,18 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
+             break;
+ 
+         case XML_ELEMENT_NODE:
+-	    if ((cur != root) && (ctxt->format == 1) && (xmlIndentTreeOutput))
++            /*
++             * Some users like lxml are known to pass nodes with a corrupted
++             * tree structure. Fall back to a recursive call to handle this
++             * case.
++             */
++            if ((cur->parent != parent) && (cur->children != NULL)) {
++                xmlNodeDumpOutputInternal(ctxt, cur);
++                break;
++            }
++
++	    if ((ctxt->level > 0) && (ctxt->format == 1) &&
++                (xmlIndentTreeOutput))
+ 		xmlOutputBufferWrite(buf, ctxt->indent_size *
+ 				     (ctxt->level > ctxt->indent_nr ?
+ 				      ctxt->indent_nr : ctxt->level),
+@@ -942,6 +956,7 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
+                 xmlOutputBufferWrite(buf, 1, ">");
+                 if (ctxt->format == 1) xmlOutputBufferWrite(buf, 1, "\n");
+                 if (ctxt->level >= 0) ctxt->level++;
++                parent = cur;
+                 cur = cur->children;
+                 continue;
+             }
+@@ -1058,13 +1073,9 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
+                 break;
+             }
+ 
+-            /*
+-             * The parent should never be NULL here but we want to handle
+-             * corrupted documents gracefully.
+-             */
+-            if (cur->parent == NULL)
+-                return;
+-            cur = cur->parent;
++            cur = parent;
++            /* cur->parent was validated when descending. */
++            parent = cur->parent;
+ 
+             if (cur->type == XML_ELEMENT_NODE) {
+                 if (ctxt->level > 0) ctxt->level--;
+-- 
+GitLab

diff --git a/dev-libs/libxml2/libxml2-2.9.12-r1.ebuild b/dev-libs/libxml2/libxml2-2.9.12-r1.ebuild
new file mode 100644
index 00000000000..2b005c416ec
--- /dev/null
+++ b/dev-libs/libxml2/libxml2-2.9.12-r1.ebuild
@@ -0,0 +1,245 @@
+# Copyright 1999-2021 Gentoo Authors
+# Distributed under the terms of the GNU General Public License v2
+
+EAPI=7
+
+# Note: Please bump in sync with dev-libs/libxslt
+
+PYTHON_COMPAT=( python3_{7,8,9} )
+PYTHON_REQ_USE="xml"
+VERIFY_SIG_OPENPGP_KEY_PATH=/usr/share/openpgp-keys/danielveillard.asc
+inherit autotools flag-o-matic prefix python-r1 multilib-minimal verify-sig
+
+XSTS_HOME="http://www.w3.org/XML/2004/xml-schema-test-suite"
+XSTS_NAME_1="xmlschema2002-01-16"
+XSTS_NAME_2="xmlschema2004-01-14"
+XSTS_TARBALL_1="xsts-2002-01-16.tar.gz"
+XSTS_TARBALL_2="xsts-2004-01-14.tar.gz"
+XMLCONF_TARBALL="xmlts20130923.tar.gz"
+DESCRIPTION="XML C parser and toolkit"
+HOMEPAGE="http://www.xmlsoft.org/ https://gitlab.gnome.org/GNOME/libxml2"
+SRC_URI="
+	ftp://xmlsoft.org/${PN}/${PN}-${PV/_rc/-rc}.tar.gz
+	test? (
+		${XSTS_HOME}/${XSTS_NAME_1}/${XSTS_TARBALL_1}
+		${XSTS_HOME}/${XSTS_NAME_2}/${XSTS_TARBALL_2}
+		https://www.w3.org/XML/Test/${XMLCONF_TARBALL}
+	)
+	verify-sig? ( ftp://xmlsoft.org/${PN}/${PN}-${PV/_rc/-rc}.tar.gz.asc )
+"
+S="${WORKDIR}/${PN}-${PV%_rc*}"
+
+LICENSE="MIT"
+SLOT="2"
+KEYWORDS="~alpha ~amd64 ~arm ~arm64 ~hppa ~ia64 ~m68k ~mips ~ppc ~ppc64 ~riscv ~s390 ~sparc ~x86 ~x64-cygwin ~amd64-linux ~x86-linux ~ppc-macos ~x64-macos ~sparc-solaris ~sparc64-solaris ~x64-solaris ~x86-solaris"
+IUSE="debug examples icu ipv6 lzma +python readline static-libs test"
+RESTRICT="!test? ( test )"
+REQUIRED_USE="python? ( ${PYTHON_REQUIRED_USE} )"
+
+BDEPEND="
+	dev-util/gtk-doc-am
+	virtual/pkgconfig
+	verify-sig? ( app-crypt/openpgp-keys-danielveillard )
+"
+RDEPEND="
+	>=sys-libs/zlib-1.2.8-r1:=[${MULTILIB_USEDEP}]
+	icu? ( >=dev-libs/icu-51.2-r1:=[${MULTILIB_USEDEP}] )
+	lzma? ( >=app-arch/xz-utils-5.0.5-r1:=[${MULTILIB_USEDEP}] )
+	python? ( ${PYTHON_DEPS} )
+	readline? ( sys-libs/readline:= )
+"
+DEPEND="${RDEPEND}"
+
+MULTILIB_CHOST_TOOLS=(
+	/usr/bin/xml2-config
+)
+
+DOCS=( AUTHORS ChangeLog NEWS README TODO TODO_SCHEMAS )
+
+PATCHES=(
+	## Gentoo
+	# Patches needed for prefix support
+	"${FILESDIR}"/${PN}-2.7.1-catalog_path.patch
+
+	# Fix python detection, bug #567066
+	# https://bugzilla.gnome.org/show_bug.cgi?id=760458
+	"${FILESDIR}"/${PN}-2.9.2-python-ABIFLAG.patch
+
+	# Fix python tests when building out of tree #565576
+	"${FILESDIR}"/${PN}-2.9.8-out-of-tree-test.patch
+
+	# bug #745162
+	"${FILESDIR}"/${PN}-2.9.8-python3-unicode-errors.patch
+
+	# Avoid failure on missing fuzz.h when running tests
+	"${FILESDIR}"/${PN}-2.9.11-disable-fuzz-tests.patch
+
+	## Upstream
+	# Fix lxml compatibility
+	"${FILESDIR}"/${PN}-2.9.12-fix-lxml-compatibility.patch
+)
+
+src_unpack() {
+	local tarname=${P/_rc/-rc}.tar.gz
+
+	if use verify-sig ; then
+		verify-sig_verify_detached "${DISTDIR}"/${tarname}{,.asc}
+	fi
+
+	# ${A} isn't used to avoid unpacking of test tarballs into ${WORKDIR},
+	# as they are needed as tarballs in ${S}/xstc instead and not unpacked
+	unpack ${tarname}
+	cd "${S}" || die
+
+	if use test ; then
+		cp "${DISTDIR}/${XSTS_TARBALL_1}" \
+			"${DISTDIR}/${XSTS_TARBALL_2}" \
+			"${S}"/xstc/ \
+			|| die "Failed to install test tarballs"
+		unpack ${XMLCONF_TARBALL}
+	fi
+}
+
+src_prepare() {
+	default
+
+	eprefixify catalog.c xmlcatalog.c runtest.c xmllint.c
+
+	if [[ ${CHOST} == *-darwin* ]] ; then
+		# Avoid final linking arguments for python modules
+		sed -i -e '/PYTHON_LIBS/s/ldflags/libs/' configure.ac || die
+		# gcc-apple doesn't grok -Wno-array-bounds
+		sed -i -e 's/-Wno-array-bounds//' configure.ac || die
+	fi
+
+	# Please do not remove, as else we get references to PORTAGE_TMPDIR
+	# in /usr/lib/python?.?/site-packages/libxml2mod.la among things.
+	# We now need to run eautoreconf at the end to prevent maintainer mode.
+	#elibtoolize
+	eautoreconf
+}
+
+multilib_src_configure() {
+	# Filter seemingly problematic CFLAGS (#26320)
+	filter-flags -fprefetch-loop-arrays -funroll-loops
+
+	# Notes:
+	# 1) USE zlib support breaks gnome2
+	#    (libgnomeprint for instance fails to compile with
+	#    fresh install, and existing) - <azarah@gentoo.org> (22 Dec 2002).
+	#
+	# 2) The meaning of the 'debug' USE flag does not apply to the --with-debug
+	#    switch (enabling the libxml2 debug module). See bug #100898.
+	#
+	# 3) --with-mem-debug causes unusual segmentation faults (bug #105120).
+
+	libxml2_configure() {
+		ECONF_SOURCE="${S}" econf \
+			--with-html-subdir=${PF}/html \
+			$(use_with debug run-debug) \
+			$(use_with icu) \
+			$(use_with lzma) \
+			$(use_enable ipv6) \
+			$(use_enable static-libs static) \
+			$(multilib_native_use_with readline) \
+			$(multilib_native_use_with readline history) \
+			"$@"
+	}
+
+	libxml2_py_configure() {
+		# Ensure python build dirs exist
+		mkdir -p "${BUILD_DIR}" || die
+
+		# Odd build system, also see bug #582130
+		run_in_build_dir libxml2_configure \
+			"--with-python=${EPYTHON}" \
+			"--with-python-install-dir=$(python_get_sitedir)"
+	}
+
+	# Build python bindings separately
+	libxml2_configure --without-python
+
+	if multilib_is_native_abi && use python ; then
+		python_foreach_impl libxml2_py_configure
+	fi
+}
+
+libxml2_py_emake() {
+	pushd "${BUILD_DIR}/python" > /dev/null || die
+
+	emake "$@"
+
+	popd > /dev/null || die
+}
+
+multilib_src_compile() {
+	default
+
+	if multilib_is_native_abi && use python ; then
+		local native_builddir="${BUILD_DIR}"
+
+		python_foreach_impl libxml2_py_emake \
+				top_builddir="${native_builddir}" \
+				all
+	fi
+}
+
+multilib_src_test() {
+	ln -s "${S}"/xmlconf || die
+
+	emake check
+
+	if multilib_is_native_abi && use python ; then
+		python_foreach_impl libxml2_py_emake test
+	fi
+}
+
+multilib_src_install() {
+	emake \
+		DESTDIR="${D}" \
+		EXAMPLES_DIR="${EPREFIX}"/usr/share/doc/${PF}/examples \
+		install
+
+	if multilib_is_native_abi && use python ; then
+		python_foreach_impl libxml2_py_emake \
+			DESTDIR="${D}" \
+			docsdir="${EPREFIX}"/usr/share/doc/${PF}/python \
+			exampledir="${EPREFIX}"/usr/share/doc/${PF}/python/examples \
+			install
+
+		python_foreach_impl python_optimize
+	fi
+}
+
+multilib_src_install_all() {
+	rm -rf "${ED}"/usr/share/doc/${P}
+
+	einstalldocs
+
+	if ! use examples ; then
+		rm -rf "${ED}"/usr/share/doc/${PF}/examples
+		rm -rf "${ED}"/usr/share/doc/${PF}/python/examples
+	fi
+
+	find "${D}" -name '*.la' -delete || die
+}
+
+pkg_postinst() {
+	# We don't want to do the xmlcatalog during stage1, as xmlcatalog will not
+	# be in / and stage1 builds to ROOT=/tmp/stage1root. This fixes bug #208887.
+	if [[ -n "${ROOT}" ]]; then
+		elog "Skipping XML catalog creation for stage building (bug #208887)."
+	else
+		# Need an XML catalog, so no-one writes to a non-existent one
+		CATALOG="${EROOT}/etc/xml/catalog"
+
+		# We don't want to clobber an existing catalog though,
+		# only ensure that one is there
+		# <obz@gentoo.org>
+		if [[ ! -e "${CATALOG}" ]]; then
+			[[ -d "${EROOT}/etc/xml" ]] || mkdir -p "${EROOT}/etc/xml"
+			"${EPREFIX}"/usr/bin/xmlcatalog --create > "${CATALOG}"
+			einfo "Created XML catalog in ${CATALOG}"
+		fi
+	fi
+}


             reply	other threads:[~2021-05-20  1:47 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-05-20  1:46 Sam James [this message]
  -- strict thread matches above, loose matches on Subject: below --
2024-11-10 21:11 [gentoo-commits] repo/gentoo:master commit in: dev-libs/libxml2/, dev-libs/libxml2/files/ Sam James
2023-05-10 22:01 Sam James
2022-11-29 19:59 David Seifert
2021-10-29 13:46 David Seifert
2021-05-13 17:50 Sam James
2020-12-03  0:14 Sam James
2020-10-30 22:39 Matt Turner
2020-10-30 22:34 Matt Turner
2020-01-01 16:28 Mart Raudsepp
2019-03-30 20:57 Mart Raudsepp
2019-01-03 11:22 Mike Frysinger
2019-01-03 11:22 Mike Frysinger
2019-01-03 11:22 Mike Frysinger
2018-07-25 16:47 Mart Raudsepp
2015-11-29 11:58 Justin Lecher

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1621475171.6d220e09b25048d28d6598f8a6ffb62f0fbe92b4.sam@gentoo \
    --to=sam@gentoo.org \
    --cc=gentoo-commits@lists.gentoo.org \
    --cc=gentoo-dev@lists.gentoo.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox