From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from lists.gentoo.org (pigeon.gentoo.org [208.92.234.80]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by finch.gentoo.org (Postfix) with ESMTPS id C489515800A for ; Sun, 30 Jul 2023 19:02:31 +0000 (UTC) Received: from pigeon.gentoo.org (localhost [127.0.0.1]) by pigeon.gentoo.org (Postfix) with SMTP id 0E9ABE0B97; Sun, 30 Jul 2023 19:02:31 +0000 (UTC) Received: from smtp.gentoo.org (woodpecker.gentoo.org [IPv6:2001:470:ea4a:1:5054:ff:fec7:86e4]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256) (No client certificate requested) by pigeon.gentoo.org (Postfix) with ESMTPS id E0B8AE0B97 for ; Sun, 30 Jul 2023 19:02:30 +0000 (UTC) Received: from oystercatcher.gentoo.org (oystercatcher.gentoo.org [148.251.78.52]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256) (No client certificate requested) by smtp.gentoo.org (Postfix) with ESMTPS id E8D4733C84E for ; Sun, 30 Jul 2023 19:02:29 +0000 (UTC) Received: from localhost.localdomain (localhost [IPv6:::1]) by oystercatcher.gentoo.org (Postfix) with ESMTP id 3260CA84 for ; Sun, 30 Jul 2023 19:02:28 +0000 (UTC) From: "Sam James" To: gentoo-commits@lists.gentoo.org Content-Transfer-Encoding: 8bit Content-type: text/plain; charset=UTF-8 Reply-To: gentoo-dev@lists.gentoo.org, "Sam James" Message-ID: <1690738247.5d5b708b7e6f858c3fc2d6a421fb424225efdb04.sam@gentoo> Subject: [gentoo-commits] proj/gcc-patches:master commit in: 13.2.0/gentoo/ X-VCS-Repository: proj/gcc-patches X-VCS-Files: 13.2.0/gentoo/31_all_gm2_make_P_var.patch 13.2.0/gentoo/82_all_arm64_PR110280_ICE_fold-const.patch 13.2.0/gentoo/83_all_all_PR110315_crash_large_std_vector.patch 13.2.0/gentoo/README.history X-VCS-Directories: 13.2.0/gentoo/ X-VCS-Committer: sam X-VCS-Committer-Name: Sam James X-VCS-Revision: 5d5b708b7e6f858c3fc2d6a421fb424225efdb04 X-VCS-Branch: master Date: Sun, 30 Jul 2023 19:02:28 +0000 (UTC) Precedence: bulk List-Post: List-Help: List-Unsubscribe: List-Subscribe: List-Id: Gentoo Linux mail X-BeenThere: gentoo-commits@lists.gentoo.org X-Auto-Response-Suppress: DR, RN, NRN, OOF, AutoReply X-Archives-Salt: 54477e10-d9de-4566-a341-96eca63d450b X-Archives-Hash: d694751569e219216afb7687cde571d6 commit: 5d5b708b7e6f858c3fc2d6a421fb424225efdb04 Author: Sam James gentoo org> AuthorDate: Sun Jul 30 17:30:47 2023 +0000 Commit: Sam James gentoo org> CommitDate: Sun Jul 30 17:30:47 2023 +0000 URL: https://gitweb.gentoo.org/proj/gcc-patches.git/commit/?id=5d5b708b 13.2.0: backport a few patches, cut patchset 4 Bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110280 Bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110315 Signed-off-by: Sam James gentoo.org> 13.2.0/gentoo/31_all_gm2_make_P_var.patch | 16 +- .../82_all_arm64_PR110280_ICE_fold-const.patch | 53 ++++ ...3_all_all_PR110315_crash_large_std_vector.patch | 353 +++++++++++++++++++++ 13.2.0/gentoo/README.history | 6 + 4 files changed, 421 insertions(+), 7 deletions(-) diff --git a/13.2.0/gentoo/31_all_gm2_make_P_var.patch b/13.2.0/gentoo/31_all_gm2_make_P_var.patch index c977874..ef34288 100644 --- a/13.2.0/gentoo/31_all_gm2_make_P_var.patch +++ b/13.2.0/gentoo/31_all_gm2_make_P_var.patch @@ -1,8 +1,8 @@ https://bugs.gentoo.org/904714 https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=79c73122fab213f218b85b2c579ffe3cf5e98ad0 -From 79c73122fab213f218b85b2c579ffe3cf5e98ad0 Mon Sep 17 00:00:00 2001 -From: =?utf8?q?Arsen=20Arsenovi=C4=87?= +From 275c516a40b7044895c4920f52ec19c7bceedd54 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Arsen=20Arsenovi=C4=87?= Date: Fri, 21 Apr 2023 18:07:29 +0200 Subject: [PATCH] gcc/m2: Drop references to $(P) @@ -14,9 +14,11 @@ gcc/m2/ChangeLog: * Make-lang.in: Remove references to $(P). * Make-maintainer.in: Ditto. + +(cherry picked from commit 79c73122fab213f218b85b2c579ffe3cf5e98ad0) --- a/gcc/m2/Make-lang.in +++ b/gcc/m2/Make-lang.in -@@ -514,7 +514,7 @@ GM2_LIBS_BOOT = m2/gm2-compiler-boot/gm2.a \ +@@ -515,7 +515,7 @@ GM2_LIBS_BOOT = m2/gm2-compiler-boot/gm2.a \ cc1gm2$(exeext): m2/stage1/cc1gm2$(exeext) $(m2.prev) cp -p $< $@ @@ -25,7 +27,7 @@ gcc/m2/ChangeLog: $(GM2_C_OBJS) $(BACKEND) $(LIBDEPS) $(GM2_LIBS) \ m2/gm2-gcc/rtegraph.o plugin/m2rte$(soext) -test -d $(@D) || $(mkinstalldirs) $(@D) -@@ -527,7 +527,7 @@ m2/stage2/cc1gm2$(exeext): m2/stage1/cc1gm2$(exeext) m2/gm2-compiler/m2flex.o $( +@@ -528,7 +528,7 @@ m2/stage2/cc1gm2$(exeext): m2/stage1/cc1gm2$(exeext) m2/gm2-compiler/m2flex.o $( @$(call LINK_PROGRESS,$(INDEX.m2),end) m2/stage1/cc1gm2$(exeext): gm2$(exeext) m2/gm2-compiler-boot/m2flex.o \ @@ -36,14 +38,14 @@ gcc/m2/ChangeLog: $(m2.prev) --- a/gcc/m2/Make-maintainer.in +++ b/gcc/m2/Make-maintainer.in -@@ -753,7 +753,7 @@ GM2_LIBS_PARANOID = m2/gm2-compiler-paranoid/gm2.a \ +@@ -848,7 +848,7 @@ GM2_LIBS_PARANOID = m2/gm2-compiler-paranoid/gm2.a \ gm2.paranoid: m2/m2obj3/cc1gm2$(exeext) gm2.verifyparanoid m2/m2obj3/cc1gm2$(exeext): m2/m2obj2/cc1gm2$(exeext) m2/gm2-compiler-paranoid/m2flex.o \ - $(P) $(GM2_C_OBJS) $(BACKEND) $(LIBDEPS) $(GM2_LIBS_PARANOID) \ + $(GM2_C_OBJS) $(BACKEND) $(LIBDEPS) $(GM2_LIBS_PARANOID) \ - m2/gm2-gcc/rtegraph.o plugin/m2rte$(exeext).so m2/gm2-libs-boot/M2LINK.o + m2/gm2-gcc/rtegraph.o plugin/m2rte$(exeext).so -test -d m2/m2obj3 || $(mkinstalldirs) m2/m2obj3 @$(call LINK_PROGRESS,$(INDEX.m2),start) -- -2.31.1 +2.41.0 diff --git a/13.2.0/gentoo/82_all_arm64_PR110280_ICE_fold-const.patch b/13.2.0/gentoo/82_all_arm64_PR110280_ICE_fold-const.patch new file mode 100644 index 0000000..d27ca7a --- /dev/null +++ b/13.2.0/gentoo/82_all_arm64_PR110280_ICE_fold-const.patch @@ -0,0 +1,53 @@ +https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110280 +https://inbox.sourceware.org/gcc-patches/nycvar.YFH.7.77.849.2307270634430.12935@jbgna.fhfr.qr/T/#t + +From 85d8e0d8d5342ec8b4e6a54e22741c30b33c6f04 Mon Sep 17 00:00:00 2001 +From: Prathamesh Kulkarni +Date: Fri, 23 Jun 2023 15:27:17 +0530 +Subject: [PATCH] [aarch64/match.pd] Fix ICE observed in PR110280. + +gcc/ChangeLog: + PR tree-optimization/110280 + * match.pd (vec_perm_expr(v, v, mask) -> v): Explicitly build vector + using build_vector_from_val with the element of input operand, and + mask's type if operand and mask's types don't match. + +gcc/testsuite/ChangeLog: + PR tree-optimization/110280 + * gcc.target/aarch64/sve/pr110280.c: New test. + +(cherry picked from commit 85d8e0d8d5342ec8b4e6a54e22741c30b33c6f04) + +--- a/gcc/match.pd ++++ b/gcc/match.pd +@@ -8292,7 +8292,14 @@ and, + + (simplify + (vec_perm vec_same_elem_p@0 @0 @1) +- @0) ++ (if (types_match (type, TREE_TYPE (@0))) ++ @0 ++ (with ++ { ++ tree elem = uniform_vector_p (@0); ++ } ++ (if (elem) ++ { build_vector_from_val (type, elem); })))) + + /* Push VEC_PERM earlier if that may help FMA perception (PR101895). */ + (simplify +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pr110280.c +@@ -0,0 +1,12 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -fdump-tree-optimized" } */ ++ ++#include "arm_sve.h" ++ ++svuint32_t l() ++{ ++ _Alignas(16) const unsigned int lanes[4] = {0, 0, 0, 0}; ++ return svld1rq_u32(svptrue_b8(), lanes); ++} ++ ++/* { dg-final { scan-tree-dump-not "VEC_PERM_EXPR" "optimized" } } */ diff --git a/13.2.0/gentoo/83_all_all_PR110315_crash_large_std_vector.patch b/13.2.0/gentoo/83_all_all_PR110315_crash_large_std_vector.patch new file mode 100644 index 0000000..7c854ff --- /dev/null +++ b/13.2.0/gentoo/83_all_all_PR110315_crash_large_std_vector.patch @@ -0,0 +1,353 @@ +https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110315 (specifically https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110315#c7) + +From 777aa930b106fea2dd6ed9fe22b42a2717f1472d Mon Sep 17 00:00:00 2001 +From: Aldy Hernandez +Date: Mon, 15 May 2023 12:25:58 +0200 +Subject: [PATCH] [GCC13] Add auto-resizing capability to irange's [PR109695] + +Backport the following from trunk. + + Note that the patch has been adapted to trees. + + The numbers for various sub-ranges on GCC13 are: + < 2> = 64 bytes, -3.02% for VRP. + < 3> = 80 bytes, -2.67% for VRP. + < 8> = 160 bytes, -2.46% for VRP. + <16> = 288 bytes, -2.40% for VRP. + + +We can now have int_range for automatically +resizable ranges. int_range_max is now int_range<3, true> +for a 69X reduction in size from current trunk, and 6.9X reduction from +GCC12. This incurs a 5% performance penalty for VRP that is more than +covered by our > 13% improvements recently. + + +int_range_max is the temporary range object we use in the ranger for +integers. With the conversion to wide_int, this structure bloated up +significantly because wide_ints are huge (80 bytes a piece) and are +about 10 times as big as a plain tree. Since the temporary object +requires 255 sub-ranges, that's 255 * 80 * 2, plus the control word. +This means the structure grew from 4112 bytes to 40912 bytes. + +This patch adds the ability to resize ranges as needed, defaulting to +no resizing, while int_range_max now defaults to 3 sub-ranges (instead +of 255) and grows to 255 when the range being calculated does not fit. + +For example: + +int_range<1> foo; // 1 sub-range with no resizing. +int_range<5> foo; // 5 sub-ranges with no resizing. +int_range<5, true> foo; // 5 sub-ranges with resizing. + +I ran some tests and found that 3 sub-ranges cover 99% of cases, so +I've set the int_range_max default to that: + + typedef int_range<3, /*RESIZABLE=*/true> int_range_max; + +We don't bother growing incrementally, since the default covers most +cases and we have a 255 hard-limit. This hard limit could be reduced +to 128, since my tests never saw a range needing more than 124, but we +could do that as a follow-up if needed. + +With 3-subranges, int_range_max is now 592 bytes versus 40912 for +trunk, and versus 4112 bytes for GCC12! The penalty is 5.04% for VRP +and 3.02% for threading, with no noticeable change in overall +compilation (0.27%). This is more than covered by our 13.26% +improvements for the legacy removal + wide_int conversion. + +I think this approach is a good alternative, while providing us with +flexibility going forward. For example, we could try defaulting to a +8 sub-ranges for a noticeable improvement in VRP. We could also use +large sub-ranges for switch analysis to avoid resizing. + +Another approach I tried was always resizing. With this, we could +drop the whole int_range nonsense, and have irange just hold a +resizable range. This simplified things, but incurred a 7% penalty on +ipa_cp. This was hard to pinpoint, and I'm not entirely convinced +this wasn't some artifact of valgrind. However, until we're sure, +let's avoid massive changes, especially since IPA changes are coming +up. + +For the curious, a particular hot spot for IPA in this area was: + +ipcp_vr_lattice::meet_with_1 (const value_range *other_vr) +{ +... +... + value_range save (m_vr); + m_vr.union_ (*other_vr); + return m_vr != save; +} + +The problem isn't the resizing (since we do that at most once) but the +fact that for some functions with lots of callers we end up a huge +range that gets copied and compared for every meet operation. Maybe +the IPA algorithm could be adjusted somehow??. + +Anywhooo... for now there is nothing to worry about, since value_range +still has 2 subranges and is not resizable. But we should probably +think what if anything we want to do here, as I envision IPA using +infinite ranges here (well, int_range_max) and handling frange's, etc. + +gcc/ChangeLog: + + PR tree-optimization/109695 + * value-range.cc (irange::operator=): Resize range. + (irange::union_): Same. + (irange::intersect): Same. + (irange::invert): Same. + (int_range_max): Default to 3 sub-ranges and resize as needed. + * value-range.h (irange::maybe_resize): New. + (~int_range): New. + (int_range::int_range): Adjust for resizing. + (int_range::operator=): Same. +--- a/gcc/value-range-storage.h ++++ b/gcc/value-range-storage.h +@@ -184,7 +184,7 @@ vrange_allocator::alloc_irange (unsigned num_pairs) + // Allocate the irange and required memory for the vector. + void *r = alloc (sizeof (irange)); + tree *mem = static_cast (alloc (nbytes)); +- return new (r) irange (mem, num_pairs); ++ return new (r) irange (mem, num_pairs, /*resizable=*/false); + } + + inline frange * +--- a/gcc/value-range.cc ++++ b/gcc/value-range.cc +@@ -831,6 +831,10 @@ irange::operator= (const irange &src) + copy_to_legacy (src); + return *this; + } ++ ++ int needed = src.num_pairs (); ++ maybe_resize (needed); ++ + if (src.legacy_mode_p ()) + { + copy_legacy_to_multi_range (src); +@@ -2506,6 +2510,7 @@ irange::irange_union (const irange &r) + // Now it simply needs to be copied, and if there are too many + // ranges, merge some. We wont do any analysis as to what the + // "best" merges are, simply combine the final ranges into one. ++ maybe_resize (i / 2); + if (i > m_max_ranges * 2) + { + res[m_max_ranges * 2 - 1] = res[i - 1]; +@@ -2605,6 +2610,11 @@ irange::irange_intersect (const irange &r) + if (r.irange_contains_p (*this)) + return intersect_nonzero_bits (r); + ++ // ?? We could probably come up with something smarter than the ++ // worst case scenario here. ++ int needed = num_pairs () + r.num_pairs (); ++ maybe_resize (needed); ++ + signop sign = TYPE_SIGN (TREE_TYPE(m_base[0])); + unsigned bld_pair = 0; + unsigned bld_lim = m_max_ranges; +@@ -2831,6 +2841,11 @@ irange::invert () + m_num_ranges = 1; + return; + } ++ ++ // At this point, we need one extra sub-range to represent the ++ // inverse. ++ maybe_resize (m_num_ranges + 1); ++ + // The algorithm is as follows. To calculate INVERT ([a,b][c,d]), we + // generate [-MIN, a-1][b+1, c-1][d+1, MAX]. + // +--- a/gcc/value-range.h ++++ b/gcc/value-range.h +@@ -172,7 +172,8 @@ public: + bool legacy_verbose_intersect (const irange *); // DEPRECATED + + protected: +- irange (tree *, unsigned); ++ void maybe_resize (int needed); ++ irange (tree *, unsigned nranges, bool resizable); + // potential promotion to public? + tree tree_lower_bound (unsigned = 0) const; + tree tree_upper_bound (unsigned) const; +@@ -200,6 +201,8 @@ protected: + void copy_to_legacy (const irange &); + void copy_legacy_to_multi_range (const irange &); + ++ // Hard limit on max ranges allowed. ++ static const int HARD_MAX_RANGES = 255; + private: + friend void gt_ggc_mx (irange *); + friend void gt_pch_nx (irange *); +@@ -214,15 +217,21 @@ private: + + bool intersect (const wide_int& lb, const wide_int& ub); + unsigned char m_num_ranges; ++ bool m_resizable; + unsigned char m_max_ranges; + tree m_nonzero_mask; ++protected: + tree *m_base; + }; + + // Here we describe an irange with N pairs of ranges. The storage for + // the pairs is embedded in the class as an array. ++// ++// If RESIZABLE is true, the storage will be resized on the heap when ++// the number of ranges needed goes past N up to a max of ++// HARD_MAX_RANGES. This new storage is freed upon destruction. + +-template ++template + class GTY((user)) int_range : public irange + { + public: +@@ -233,7 +242,7 @@ public: + int_range (tree type); + int_range (const int_range &); + int_range (const irange &); +- virtual ~int_range () = default; ++ virtual ~int_range (); + int_range& operator= (const int_range &); + private: + template friend void gt_ggc_mx (int_range *); +@@ -472,6 +481,38 @@ is_a (vrange &v) + return v.m_discriminator == VR_FRANGE; + } + ++// For resizable ranges, resize the range up to HARD_MAX_RANGES if the ++// NEEDED pairs is greater than the current capacity of the range. ++ ++inline void ++irange::maybe_resize (int needed) ++{ ++ if (!m_resizable || m_max_ranges == HARD_MAX_RANGES) ++ return; ++ ++ if (needed > m_max_ranges) ++ { ++ m_max_ranges = HARD_MAX_RANGES; ++ tree *newmem = new tree[m_max_ranges * 2]; ++ memcpy (newmem, m_base, sizeof (tree) * num_pairs () * 2); ++ m_base = newmem; ++ } ++} ++ ++template ++inline ++int_range::~int_range () ++{ ++ if (RESIZABLE && m_base != m_ranges) ++ delete m_base; ++} ++ ++// This is an "infinite" precision irange for use in temporary ++// calculations. It starts with a sensible default covering 99% of ++// uses, and goes up to HARD_MAX_RANGES when needed. Any allocated ++// storage is freed upon destruction. ++typedef int_range<3, /*RESIZABLE=*/true> int_range_max; ++ + class vrange_visitor + { + public: +@@ -490,10 +531,6 @@ public: + // There are copy operators to seamlessly copy to/fro multi-ranges. + typedef int_range<1> value_range; + +-// This is an "infinite" precision irange for use in temporary +-// calculations. +-typedef int_range<255> int_range_max; +- + // This is an "infinite" precision range object for use in temporary + // calculations for any of the handled types. The object can be + // transparently used as a vrange. +@@ -872,64 +909,65 @@ gt_pch_nx (int_range *x, gt_pointer_operator op, void *cookie) + // Constructors for irange + + inline +-irange::irange (tree *base, unsigned nranges) ++irange::irange (tree *base, unsigned nranges, bool resizable) + { + m_discriminator = VR_IRANGE; + m_base = base; + m_max_ranges = nranges; ++ m_resizable = resizable; + set_undefined (); + } + + // Constructors for int_range<>. + +-template ++template + inline +-int_range::int_range () +- : irange (m_ranges, N) ++int_range::int_range () ++ : irange (m_ranges, N, RESIZABLE) + { + } + +-template +-int_range::int_range (const int_range &other) +- : irange (m_ranges, N) ++template ++int_range::int_range (const int_range &other) ++ : irange (m_ranges, N, RESIZABLE) + { + irange::operator= (other); + } + +-template +-int_range::int_range (tree min, tree max, value_range_kind kind) +- : irange (m_ranges, N) ++template ++int_range::int_range (tree min, tree max, value_range_kind kind) ++ : irange (m_ranges, N, RESIZABLE) + { + irange::set (min, max, kind); + } + +-template +-int_range::int_range (tree type) +- : irange (m_ranges, N) ++template ++int_range::int_range (tree type) ++ : irange (m_ranges, N, RESIZABLE) + { + set_varying (type); + } + +-template +-int_range::int_range (tree type, const wide_int &wmin, const wide_int &wmax, ++template ++int_range::int_range (tree type, const wide_int &wmin, const wide_int &wmax, + value_range_kind kind) +- : irange (m_ranges, N) ++ : irange (m_ranges, N, RESIZABLE) + { + tree min = wide_int_to_tree (type, wmin); + tree max = wide_int_to_tree (type, wmax); + set (min, max, kind); + } + +-template +-int_range::int_range (const irange &other) +- : irange (m_ranges, N) ++template ++int_range::int_range (const irange &other) ++ : irange (m_ranges, N, RESIZABLE) + { + irange::operator= (other); + } + +-template +-int_range& +-int_range::operator= (const int_range &src) ++template ++int_range& ++int_range::operator= (const int_range &src) + { + irange::operator= (src); + return *this; +-- +2.40.0 diff --git a/13.2.0/gentoo/README.history b/13.2.0/gentoo/README.history index be66787..998f555 100644 --- a/13.2.0/gentoo/README.history +++ b/13.2.0/gentoo/README.history @@ -1,3 +1,9 @@ +4 30 Jul 2023 + + U 31_all_gm2_make_P_var.patch + + 82_all_arm64_PR110280_ICE_fold-const.patch + + 83_all_all_PR110315_crash_large_std_vector.patch + 3 26 May 2023 + 76_all_match.pd-don-t-emit-label-if-not-needed.patch