public inbox for gentoo-dev@lists.gentoo.org
 help / color / mirror / Atom feed
* [gentoo-dev] Proposal to undeprecate EGO_SUM
@ 2022-06-13  7:44 Florian Schmaus
  2022-06-13  7:44 ` [gentoo-dev] [PATCH] go-module.eclass: " Florian Schmaus
                   ` (4 more replies)
  0 siblings, 5 replies; 58+ messages in thread
From: Florian Schmaus @ 2022-06-13  7:44 UTC (permalink / raw
  To: gentoo-dev; +Cc: William Hubbs

Judging from the gentoo-dev@ mailing list discussion [1] about EGO_SUM,
where some voices where in agreement that EGO_SUM has its raison d'être,
while there where no arguments in favor of eventually removing EGO_SUM,
I hereby propose to undeprecate EGO_SUM.

1: https://archives.gentoo.org/gentoo-dev/message/1a64a8e7694c3ee11cd48a58a95f2faa




^ permalink raw reply	[flat|nested] 58+ messages in thread

* [gentoo-dev] [PATCH] go-module.eclass: undeprecate EGO_SUM
  2022-06-13  7:44 [gentoo-dev] Proposal to undeprecate EGO_SUM Florian Schmaus
@ 2022-06-13  7:44 ` Florian Schmaus
  2022-06-13  9:49   ` Andrew Ammerlaan
  2022-06-17 15:53   ` William Hubbs
  2022-06-13  8:29 ` [gentoo-dev] Proposal to " Michał Górny
                   ` (3 subsequent siblings)
  4 siblings, 2 replies; 58+ messages in thread
From: Florian Schmaus @ 2022-06-13  7:44 UTC (permalink / raw
  To: gentoo-dev; +Cc: William Hubbs, Florian Schmaus

Following the gentoo-dev@ mailing list discussion [1], this
un-deprecates EGO_SUM.

1: https://archives.gentoo.org/gentoo-dev/message/1a64a8e7694c3ee11cd48a58a95f2faa

Signed-off-by: Florian Schmaus <flow@gentoo.org>
---
 eclass/go-module.eclass | 30 +++++++++++-------------------
 1 file changed, 11 insertions(+), 19 deletions(-)

diff --git a/eclass/go-module.eclass b/eclass/go-module.eclass
index a5dafb45cab8..2e0b7dc1726d 100644
--- a/eclass/go-module.eclass
+++ b/eclass/go-module.eclass
@@ -25,7 +25,9 @@
 #
 # If the software has a directory named vendor in its
 # top level directory, the only thing you need to do is inherit the
-# eclass. If it doesn't, you need to also create a dependency tarball and
+# eclass. If it doesn't, then you either 1) populate EGO_SUM in the ebuild and
+# call go-module_set_globals as discussed below or 2) you need to create a
+# dependency tarball and
 # host it somewhere, for example in your dev space.
 #
 # Here is an example of how to create a dependency tarball.
@@ -52,9 +54,14 @@
 #
 # inherit go-module
 #
+# EGO_SUM=(
+#   "github.com/aybabtme/rgbterm v0.0.0-20170906152045-cc83f3b3ce59/go.mod"
+#   "github.com/aybabtme/rgbterm v0.0.0-20170906152045-cc83f3b3ce59"
+# )
+#
+# go-mdoule_set_globals
 # SRC_URI="https://github.com/example/${PN}/archive/v${PV}.tar.gz -> ${P}.tar.gz"
-# Add this line if you have a dependency tarball.
-# SRC_URI+=" ${P}-deps.tar.xz"
+# SRC_URI+=" ${EGO_SUM_SRC_URI}"
 #
 # @CODE
 
@@ -105,11 +112,7 @@ QA_FLAGS_IGNORED='.*'
 RESTRICT+=" strip"
 
 # @ECLASS_VARIABLE: EGO_SUM
-# @DEPRECATED: none
 # @DESCRIPTION:
-# This is replaced by a dependency tarball, see above for how to create
-# one.
-#
 # This array is based on the contents of the go.sum file from the top
 # level directory of the software you are packaging. Each entry must be
 # quoted and contain the first two fields of a line from go.sum.
@@ -153,7 +156,6 @@ RESTRICT+=" strip"
 # go.sum copy of the Hash1 values during building of the package.
 
 # @ECLASS_VARIABLE: _GOMODULE_GOPROXY_BASEURI
-# @DEPRECATED: none
 # @DESCRIPTION:
 # Golang module proxy service to fetch module files from. Note that the module
 # proxy generally verifies modules via the Hash1 code.
@@ -176,7 +178,6 @@ RESTRICT+=" strip"
 : "${_GOMODULE_GOPROXY_BASEURI:=mirror://goproxy/}"
 
 # @ECLASS_VARIABLE: _GOMODULE_GOSUM_REVERSE_MAP
-# @DEPRECATED: none
 # @DESCRIPTION:
 # Mapping back from Gentoo distfile name to upstream distfile path.
 # Associative array to avoid O(N*M) performance when populating the GOPROXY
@@ -206,7 +207,6 @@ ego() {
 }
 
 # @FUNCTION: go-module_set_globals
-# @DEPRECATED: none
 # @DESCRIPTION:
 # Convert the information in EGO_SUM for other usage in the ebuild.
 # - Populates EGO_SUM_SRC_URI that can be added to SRC_URI
@@ -297,7 +297,6 @@ go-module_set_globals() {
 }
 
 # @FUNCTION: go-module_setup_proxy
-# @DEPRECATED: none
 # @DESCRIPTION:
 # If your ebuild redefines src_unpack and uses EGO_SUM you need to call
 # this function in src_unpack.
@@ -341,14 +340,11 @@ go-module_setup_proxy() {
 # @FUNCTION: go-module_src_unpack
 # @DESCRIPTION:
 # If EGO_SUM is set, unpack the base tarball(s) and set up the
-#   local go proxy. Also warn that this usage is deprecated.
+#   local go proxy.
 # - Otherwise, if EGO_VENDOR is set, bail out.
 # - Otherwise do a normal unpack.
 go-module_src_unpack() {
 	if [[ "${#EGO_SUM[@]}" -gt 0 ]]; then
-		eqawarn "This ebuild uses EGO_SUM which is deprecated"
-		eqawarn "Please migrate to a dependency tarball"
-		eqawarn "This will become a fatal error in the future"
 		_go-module_src_unpack_gosum
 	elif [[ "${#EGO_VENDOR[@]}" -gt 0 ]]; then
 		eerror "${EBUILD} is using EGO_VENDOR which is no longer supported"
@@ -359,7 +355,6 @@ go-module_src_unpack() {
 }
 
 # @FUNCTION: _go-module_src_unpack_gosum
-# @DEPRECATED: none
 # @DESCRIPTION:
 # Populate a GOPROXY directory hierarchy with distfiles from EGO_SUM and
 # unpack the base distfiles.
@@ -405,7 +400,6 @@ _go-module_src_unpack_gosum() {
 }
 
 # @FUNCTION: _go-module_gosum_synthesize_files
-# @DEPRECATED: none
 # @DESCRIPTION:
 # Given a path &  version, populate all Goproxy metadata files which aren't
 # needed to be downloaded directly.
@@ -433,7 +427,6 @@ _go-module_gosum_synthesize_files() {
 }
 
 # @FUNCTION: _go-module_src_unpack_verify_gosum
-# @DEPRECATED: none
 # @DESCRIPTION:
 # Validate the Go modules declared by EGO_SUM are sufficient to cover building
 # the package, without actually building it yet.
@@ -482,7 +475,6 @@ go-module_live_vendor() {
 }
 
 # @FUNCTION: _go-module_gomod_encode
-# @DEPRECATED: none
 # @DESCRIPTION:
 # Encode the name(path) of a Golang module in the format expected by Goproxy.
 #
-- 
2.35.1



^ permalink raw reply related	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] Proposal to undeprecate EGO_SUM
  2022-06-13  7:44 [gentoo-dev] Proposal to undeprecate EGO_SUM Florian Schmaus
  2022-06-13  7:44 ` [gentoo-dev] [PATCH] go-module.eclass: " Florian Schmaus
@ 2022-06-13  8:29 ` Michał Górny
  2022-06-13  8:49   ` Ulrich Mueller
                     ` (2 more replies)
  2022-06-14 17:34 ` [gentoo-dev] " Arsen Arsenović
                   ` (2 subsequent siblings)
  4 siblings, 3 replies; 58+ messages in thread
From: Michał Górny @ 2022-06-13  8:29 UTC (permalink / raw
  To: gentoo-dev; +Cc: William Hubbs

On Mon, 2022-06-13 at 09:44 +0200, Florian Schmaus wrote:
> Judging from the gentoo-dev@ mailing list discussion [1] about EGO_SUM,
> where some voices where in agreement that EGO_SUM has its raison d'être,
> while there where no arguments in favor of eventually removing EGO_SUM,
> I hereby propose to undeprecate EGO_SUM.
> 
> 1: https://archives.gentoo.org/gentoo-dev/message/1a64a8e7694c3ee11cd48a58a95f2faa
> 

"We've been rehashing the discussion until all opposition got tired
and stopped replying, then we claim everyone agrees".

-- 
Best regards,
Michał Górny



^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] Proposal to undeprecate EGO_SUM
  2022-06-13  8:29 ` [gentoo-dev] Proposal to " Michał Górny
@ 2022-06-13  8:49   ` Ulrich Mueller
  2022-06-13  9:34     ` Florian Schmaus
  2022-06-13  9:30   ` Florian Schmaus
  2022-06-14  9:37   ` Michał Górny
  2 siblings, 1 reply; 58+ messages in thread
From: Ulrich Mueller @ 2022-06-13  8:49 UTC (permalink / raw
  To: Michał Górny; +Cc: gentoo-dev, William Hubbs

[-- Attachment #1: Type: text/plain, Size: 944 bytes --]

>>>>> On Mon, 13 Jun 2022, Michał Górny wrote:

> On Mon, 2022-06-13 at 09:44 +0200, Florian Schmaus wrote:
>> Judging from the gentoo-dev@ mailing list discussion [1] about EGO_SUM,
>> where some voices where in agreement that EGO_SUM has its raison d'être,
>> while there where no arguments in favor of eventually removing EGO_SUM,
>> I hereby propose to undeprecate EGO_SUM.
>> 
>> 1: https://archives.gentoo.org/gentoo-dev/message/1a64a8e7694c3ee11cd48a58a95f2faa

Can this be done without requesting changes to package managers?
Previous examples are unexporting variables because their size exceeds
the limit of the Linux kernel [2], or introduction of additional phase
functions that bypass Manifest validation [3].

> "We've been rehashing the discussion until all opposition got tired
> and stopped replying, then we claim everyone agrees".

[2] https://bugs.gentoo.org/721088
[3] https://bugs.gentoo.org/833567

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 507 bytes --]

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] Proposal to undeprecate EGO_SUM
  2022-06-13  8:29 ` [gentoo-dev] Proposal to " Michał Górny
  2022-06-13  8:49   ` Ulrich Mueller
@ 2022-06-13  9:30   ` Florian Schmaus
  2022-06-13 11:03     ` Michał Górny
  2022-06-14  9:37   ` Michał Górny
  2 siblings, 1 reply; 58+ messages in thread
From: Florian Schmaus @ 2022-06-13  9:30 UTC (permalink / raw
  To: gentoo-dev

On 13/06/2022 10.29, Michał Górny wrote:
> On Mon, 2022-06-13 at 09:44 +0200, Florian Schmaus wrote:
>> Judging from the gentoo-dev@ mailing list discussion [1] about EGO_SUM,
>> where some voices where in agreement that EGO_SUM has its raison d'être,
>> while there where no arguments in favor of eventually removing EGO_SUM,
>> I hereby propose to undeprecate EGO_SUM.
>>
>> 1: https://archives.gentoo.org/gentoo-dev/message/1a64a8e7694c3ee11cd48a58a95f2faa
>>
> 
> "We've been rehashing the discussion until all opposition got tired
> and stopped replying, then we claim everyone agrees".

I understand this comment so that there was already a discussion about 
deprecating and removing EGO_SUM. I usually try to follow what's going 
on Gentoo and I remember the discussion about introducing dependency 
tarballs. But I apparently have missed the part where EGO_SUM was slated 
for removal. And it appears I am not the only one, at least Ionen also 
wrote "Missed bits and pieces but was never quite sure why this went 
toward full deprecation, just discouraged may have been fair enough, …".

In any case, I am sorry for bringing this discussion up again. But since 
I started rehashing this, no arguments why EGO_SUM should be removed 
have been provided. And so far, I failed to find the old discussions 
where I'd hope to find some rationale behind the deprecation of EGO_SUM. :/

- Flow




^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] Proposal to undeprecate EGO_SUM
  2022-06-13  8:49   ` Ulrich Mueller
@ 2022-06-13  9:34     ` Florian Schmaus
  2022-06-13 10:26       ` Ulrich Mueller
  0 siblings, 1 reply; 58+ messages in thread
From: Florian Schmaus @ 2022-06-13  9:34 UTC (permalink / raw
  To: gentoo-dev

On 13/06/2022 10.49, Ulrich Mueller wrote:
>>>>>> On Mon, 13 Jun 2022, Michał Górny wrote:
> 
>> On Mon, 2022-06-13 at 09:44 +0200, Florian Schmaus wrote:
>>> Judging from the gentoo-dev@ mailing list discussion [1] about EGO_SUM,
>>> where some voices where in agreement that EGO_SUM has its raison d'être,
>>> while there where no arguments in favor of eventually removing EGO_SUM,
>>> I hereby propose to undeprecate EGO_SUM.
>>>
>>> 1: https://archives.gentoo.org/gentoo-dev/message/1a64a8e7694c3ee11cd48a58a95f2faa
> 
> Can this be done without requesting changes to package managers?

What is 'this' here? The patchset does not make changes to any package 
manager, just the go-module eclass.

Note that this is not about finding about an alternative to dependency 
tarballs. It is just about re-allowing EGO_SUM in addition to dependency 
tarballs for packaging Go software in Gentoo.

- Flow



^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] [PATCH] go-module.eclass: undeprecate EGO_SUM
  2022-06-13  7:44 ` [gentoo-dev] [PATCH] go-module.eclass: " Florian Schmaus
@ 2022-06-13  9:49   ` Andrew Ammerlaan
  2022-06-13 10:25     ` Florian Schmaus
  2022-06-17 15:53   ` William Hubbs
  1 sibling, 1 reply; 58+ messages in thread
From: Andrew Ammerlaan @ 2022-06-13  9:49 UTC (permalink / raw
  To: gentoo-dev

On 13/06/2022 09:44, Florian Schmaus wrote:
> Following the gentoo-dev@ mailing list discussion [1], this
> un-deprecates EGO_SUM.
> 
> 1: https://archives.gentoo.org/gentoo-dev/message/1a64a8e7694c3ee11cd48a58a95f2faa
> 
> Signed-off-by: Florian Schmaus <flow@gentoo.org>
> ---
>   eclass/go-module.eclass | 30 +++++++++++-------------------
>   1 file changed, 11 insertions(+), 19 deletions(-)
> 
> diff --git a/eclass/go-module.eclass b/eclass/go-module.eclass
> index a5dafb45cab8..2e0b7dc1726d 100644
> --- a/eclass/go-module.eclass
> +++ b/eclass/go-module.eclass
> @@ -25,7 +25,9 @@
>   #
>   # If the software has a directory named vendor in its
>   # top level directory, the only thing you need to do is inherit the
> -# eclass. If it doesn't, you need to also create a dependency tarball and
> +# eclass. If it doesn't, then you either 1) populate EGO_SUM in the ebuild and
> +# call go-module_set_globals as discussed below or 2) you need to create a
> +# dependency tarball and
>   # host it somewhere, for example in your dev space.
>   #
>   # Here is an example of how to create a dependency tarball.

I agree that EGO_SUM shouldn't have been completely deprecated following 
the previous discussion. It is not always possible or practical to host 
a dependency tarball, especially for people who aren't Gentoo 
developers. However, from what I remember from the previous discussion 
there actually were some good arguments against using EGO_SUM, mainly 
relating to the size of the ebuilds and manifests. So perhaps we can 
find some middle ground here and keep EGO_SUM for e.g. third-party 
repositories, while recommending dependency tarballs for use in 
::gentoo. Could we add something among those lines to the text here?

Best regards,
Andrew


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] [PATCH] go-module.eclass: undeprecate EGO_SUM
  2022-06-13  9:49   ` Andrew Ammerlaan
@ 2022-06-13 10:25     ` Florian Schmaus
  0 siblings, 0 replies; 58+ messages in thread
From: Florian Schmaus @ 2022-06-13 10:25 UTC (permalink / raw
  To: gentoo-dev

On 13/06/2022 11.49, Andrew Ammerlaan wrote:
> On 13/06/2022 09:44, Florian Schmaus wrote:
>> Following the gentoo-dev@ mailing list discussion [1], this
>> un-deprecates EGO_SUM.
>>
>> 1: 
>> https://archives.gentoo.org/gentoo-dev/message/1a64a8e7694c3ee11cd48a58a95f2faa 
>>
>>
>> Signed-off-by: Florian Schmaus <flow@gentoo.org>
>> ---
>>   eclass/go-module.eclass | 30 +++++++++++-------------------
>>   1 file changed, 11 insertions(+), 19 deletions(-)
>>
>> diff --git a/eclass/go-module.eclass b/eclass/go-module.eclass
>> index a5dafb45cab8..2e0b7dc1726d 100644
>> --- a/eclass/go-module.eclass
>> +++ b/eclass/go-module.eclass
>> @@ -25,7 +25,9 @@
>>   #
>>   # If the software has a directory named vendor in its
>>   # top level directory, the only thing you need to do is inherit the
>> -# eclass. If it doesn't, you need to also create a dependency tarball 
>> and
>> +# eclass. If it doesn't, then you either 1) populate EGO_SUM in the 
>> ebuild and
>> +# call go-module_set_globals as discussed below or 2) you need to 
>> create a
>> +# dependency tarball and
>>   # host it somewhere, for example in your dev space.
>>   #
>>   # Here is an example of how to create a dependency tarball.
> 
> I agree that EGO_SUM shouldn't have been completely deprecated following 
> the previous discussion. It is not always possible or practical to host 
> a dependency tarball, especially for people who aren't Gentoo 
> developers. However, from what I remember from the previous discussion 
> there actually were some good arguments against using EGO_SUM, mainly 
> relating to the size of the ebuilds and manifests.

What's wrong with an ebuild using an EGO_SUM with a few entries in ::gentoo?

- Florian


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] Proposal to undeprecate EGO_SUM
  2022-06-13  9:34     ` Florian Schmaus
@ 2022-06-13 10:26       ` Ulrich Mueller
  2022-06-17 16:27         ` William Hubbs
  0 siblings, 1 reply; 58+ messages in thread
From: Ulrich Mueller @ 2022-06-13 10:26 UTC (permalink / raw
  To: Florian Schmaus; +Cc: gentoo-dev

[-- Attachment #1: Type: text/plain, Size: 897 bytes --]

>>>>> On Mon, 13 Jun 2022, Florian Schmaus wrote:

>>>> Judging from the gentoo-dev@ mailing list discussion [1] about EGO_SUM,
>>>> where some voices where in agreement that EGO_SUM has its raison d'être,
>>>> while there where no arguments in favor of eventually removing EGO_SUM,
>>>> I hereby propose to undeprecate EGO_SUM.
>>>> 
>>>> 1: https://archives.gentoo.org/gentoo-dev/message/1a64a8e7694c3ee11cd48a58a95f2faa

>> Can this be done without requesting changes to package managers?

> What is 'this' here?

Undeprecating EGO_SUM.

> The patchset does not make changes to any package manager, just the
> go-module eclass.

> Note that this is not about finding about an alternative to dependency
> tarballs. It is just about re-allowing EGO_SUM in addition to
> dependency tarballs for packaging Go software in Gentoo.

OK. Thanks for the clarification.

Ulrich

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 507 bytes --]

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] Proposal to undeprecate EGO_SUM
  2022-06-13  9:30   ` Florian Schmaus
@ 2022-06-13 11:03     ` Michał Górny
  0 siblings, 0 replies; 58+ messages in thread
From: Michał Górny @ 2022-06-13 11:03 UTC (permalink / raw
  To: gentoo-dev

On Mon, 2022-06-13 at 11:30 +0200, Florian Schmaus wrote:
> On 13/06/2022 10.29, Michał Górny wrote:
> > On Mon, 2022-06-13 at 09:44 +0200, Florian Schmaus wrote:
> > > Judging from the gentoo-dev@ mailing list discussion [1] about EGO_SUM,
> > > where some voices where in agreement that EGO_SUM has its raison d'être,
> > > while there where no arguments in favor of eventually removing EGO_SUM,
> > > I hereby propose to undeprecate EGO_SUM.
> > > 
> > > 1: https://archives.gentoo.org/gentoo-dev/message/1a64a8e7694c3ee11cd48a58a95f2faa
> > > 
> > 
> > "We've been rehashing the discussion until all opposition got tired
> > and stopped replying, then we claim everyone agrees".
> 
> I understand this comment so that there was already a discussion about 
> deprecating and removing EGO_SUM. I usually try to follow what's going 
> on Gentoo and I remember the discussion about introducing dependency 
> tarballs. But I apparently have missed the part where EGO_SUM was slated 
> for removal. And it appears I am not the only one, at least Ionen also 
> wrote "Missed bits and pieces but was never quite sure why this went 
> toward full deprecation, just discouraged may have been fair enough, …".
> 
> In any case, I am sorry for bringing this discussion up again. But since 
> I started rehashing this, no arguments why EGO_SUM should be removed 
> have been provided. And so far, I failed to find the old discussions 
> where I'd hope to find some rationale behind the deprecation of EGO_SUM. :/
> 

I disagree.  Robin has made a pretty complete summary in his mail, with
numbers that prove how bad EGO_SUM is/was [1].  While he may have
disagreed with dependency tarballs, he brought pretty clear arguments
how EGO_SUM is even worse.  Multiplied by all the Gentoo systems that
won't ever install 95% of Go packages, yet all have to carry their
overhead.

[1]
https://archives.gentoo.org/gentoo-dev/message/8e2a4002bfc6258d65dcf725db347cb9

-- 
Best regards,
Michał Górny



^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] Proposal to undeprecate EGO_SUM
  2022-06-13  8:29 ` [gentoo-dev] Proposal to " Michał Górny
  2022-06-13  8:49   ` Ulrich Mueller
  2022-06-13  9:30   ` Florian Schmaus
@ 2022-06-14  9:37   ` Michał Górny
  2022-06-14 10:29     ` Florian Schmaus
  2 siblings, 1 reply; 58+ messages in thread
From: Michał Górny @ 2022-06-14  9:37 UTC (permalink / raw
  To: gentoo-dev; +Cc: William Hubbs

On Mon, 2022-06-13 at 10:29 +0200, Michał Górny wrote:
> On Mon, 2022-06-13 at 09:44 +0200, Florian Schmaus wrote:
> > Judging from the gentoo-dev@ mailing list discussion [1] about EGO_SUM,
> > where some voices where in agreement that EGO_SUM has its raison d'être,
> > while there where no arguments in favor of eventually removing EGO_SUM,
> > I hereby propose to undeprecate EGO_SUM.
> > 
> > 1: https://archives.gentoo.org/gentoo-dev/message/1a64a8e7694c3ee11cd48a58a95f2faa
> > 
> 
> "We've been rehashing the discussion until all opposition got tired
> and stopped replying, then we claim everyone agrees".

First of all, I am sorry for my tone.

I have been thinking about it and I was wrong to oppose this change.
I have been conflating two problem: EGO_SUM and Manifest sizes. 
However, while EGO_SUM might be an important factor contributing to
the latter, I think we shouldn't single it out and instead focus
on addressing the actual problem.

That said, I believe it's within maintainer's right to decide what API
to deprecate and what API to support.  So I'd suggest getting William's
approval for this rather than changing the supported API of that eclass
via drive-by commits.

-- 
Best regards,
Michał Górny



^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] Proposal to undeprecate EGO_SUM
  2022-06-14  9:37   ` Michał Górny
@ 2022-06-14 10:29     ` Florian Schmaus
  2022-06-14 16:33       ` [gentoo-dev] " Holger Hoffstätte
  0 siblings, 1 reply; 58+ messages in thread
From: Florian Schmaus @ 2022-06-14 10:29 UTC (permalink / raw
  To: gentoo-dev

On 14/06/2022 11.37, Michał Górny wrote:
> On Mon, 2022-06-13 at 10:29 +0200, Michał Górny wrote:
>> On Mon, 2022-06-13 at 09:44 +0200, Florian Schmaus wrote:
>>> Judging from the gentoo-dev@ mailing list discussion [1] about EGO_SUM,
>>> where some voices where in agreement that EGO_SUM has its raison d'être,
>>> while there where no arguments in favor of eventually removing EGO_SUM,
>>> I hereby propose to undeprecate EGO_SUM.
>>>
>>> 1: https://archives.gentoo.org/gentoo-dev/message/1a64a8e7694c3ee11cd48a58a95f2faa
>>>
>>
>> "We've been rehashing the discussion until all opposition got tired
>> and stopped replying, then we claim everyone agrees".
> 
> First of all, I am sorry for my tone.

No worries and no offense taken. I can easily see how this could be 
considered rehashing a old discussion, but the truth is simply that the 
deprecation of EGO_SUM cough me by surprise.


> I have been thinking about it and I was wrong to oppose this change.
> I have been conflating two problem: EGO_SUM and Manifest sizes.
> However, while EGO_SUM might be an important factor contributing to
> the latter, I think we shouldn't single it out and instead focus
> on addressing the actual problem.

Exactly my line of though. Especially since it is not unlikely that we 
will run into this problem with other programming language ecosystems 
too (where the "dependency tarball" solution may not be easily viable).


> That said, I believe it's within maintainer's right to decide what API
> to deprecate and what API to support.  So I'd suggest getting William's
> approval for this rather than changing the supported API of that eclass
> via drive-by commits.

That was never my intention, hence the subject starts with "Proposal to" 
and I explicitly but William in CC. I believed that one week after the 
discussion around my initial gentoo-dev@ post, which gave me the 
impression that un-deprecating EGO_SUM has some supporters and no 
opposer, it was time to post a concrete proposal in form of a suggested 
code change.

Looking forward to William's take on this. :)

- Flow


^ permalink raw reply	[flat|nested] 58+ messages in thread

* [gentoo-dev] Re: Proposal to undeprecate EGO_SUM
  2022-06-14 10:29     ` Florian Schmaus
@ 2022-06-14 16:33       ` Holger Hoffstätte
  2022-06-14 17:03         ` Florian Schmaus
  0 siblings, 1 reply; 58+ messages in thread
From: Holger Hoffstätte @ 2022-06-14 16:33 UTC (permalink / raw
  To: gentoo-dev, Florian Schmaus

(I hope this makes it to the -dev list)

Hello everyone -

I'm not an official dev but frequently report bugs, fixes and also
maintain a few go-based ebuilds in my private overlay. I also hate
golang with the force of a thousand suns, but hat's not important
right now.

Since I recently converted all my ebuilds from EGO_SUM to the
tarball way of doing things I'd like to chime in.
Also I'm not going to rehash everything that has been said, except
maybe that the space usage of the tarballs is nothing short of *insane*.

OTOH having to paste a weird list of dependencies into the
ebuild is also insane, even though get-ego-vendor makes this
palatable. With an eye towards fixing *that* with a bit more
automation, let's look at the pieces of the puzzle.

The candidate on the table: the ebuild for restic, a popular
and pretty clever backup program.

The restic ebuild by itself is ~40k:
$cd /var/db/repos/gentoo/app-backup/restic
$ls -al restic-0.13.1.ebuild
-rw-r--r-- 1 root root 40699 Apr 23 13:11 restic-0.13.1.ebuild

If we separate the ebuild from the EGO_SUM blurb, we get:
$ls -al restic-0.13.1*
-rw-r--r-- 1 holger users 39668 Jun 14 17:50 restic-0.13.1-EGO_SUM
-rw-r--r-- 1 holger users  1030 Jun 14 17:51 restic-0.13.1.ebuild

Nothing new here. But how large is the EGO_SUM really?
$ls -al restic-0.13.1-EGO_SUM.bz2
-rw-r--r-- 1 holger users 7902 Jun 14 17:50 restic-0.13.1-EGO_SUM.bz2

Much smaller obviously, but probably still too large for including in
$FILESDIR. So my idea here is: instead of chucking EGO_SUM (automatically
generated declarative dependency management) out the window, can we not
separate the two and instead of uploading the tarball upload the
dependency set instead? This does not fix the mentioned trust problem
since a dev space can still be hijacked, but that is already the case.
Anyway.

The only new requirement here would be to load/parse the EGO_SUM.bz2 into
the ebuild, but I'm sure that can be solved. Note that only the SHA of
the EGO_SUM.bz2 would be verified as dependency, not all the
contents - same as with the tarball.

This would eliminate the space bloat/bandwith amplification problem,
distfile caching across ebuilds could again work as expected (even though
go successfully makes that almost futile), and with slightly better
tooling in ego-get-vendor could reduce toil when bumping an ebuild.

I'm looking forward to hear why this idea is terrible. :)

Thank you all for Gentoo.

cheers
Holger



^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] Re: Proposal to undeprecate EGO_SUM
  2022-06-14 16:33       ` [gentoo-dev] " Holger Hoffstätte
@ 2022-06-14 17:03         ` Florian Schmaus
  2022-06-15  5:53           ` Michał Górny
  0 siblings, 1 reply; 58+ messages in thread
From: Florian Schmaus @ 2022-06-14 17:03 UTC (permalink / raw
  To: Holger Hoffstätte; +Cc: gentoo-dev

On 14.06.22 18:33, Holger Hoffstätte wrote:
> So my idea here is: instead of chucking EGO_SUM (automatically
> generated declarative dependency management) out the window, can we not
> separate the two and instead of uploading the tarball upload the
> dependency set instead?
I think that this idea that has been pitched already (see for example 
Robin's post [1]), although in a broader non-Go-specific sense and it is 
one obvious way to move forward.

An, and probably the largest, obstacle is that this can not be 
implemented in an eclass alone. Due the sandboxing during the build 
process, fetching distfiles, which is what we are talking about, is the 
package managers job and hence, I believe, this would require adustments 
to the package manager and package manager specification (PMS).

The basic idea, at least to my understanding (or how I would propose 
it), is to have a new top-level ebuild variable

SRC_URI_FILE="https://example.org/manifests/restic-0.13.1.files"

where restic-0.13.1.files contains lines like

<SRC_URI> <SIZE> <HASH> [<TARGET_FILENAME>]

which is, as you nicely demonstrated on the restic ebuild, where the 
bytes contributing to the ebuild size bloat originate from.

Those bytes are now outsourced from ::gentoo, can be fetched on-demand, 
allowing the package manager to download the individual distfiles into 
DISTDIR, where an, e.g., the go eclass can process them further within 
the constraints of the security sandbox.

- Flow


1: 
https://archives.gentoo.org/gentoo-dev/message/8e2a4002bfc6258d65dcf725db347cb9


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] Proposal to undeprecate EGO_SUM
  2022-06-13  7:44 [gentoo-dev] Proposal to undeprecate EGO_SUM Florian Schmaus
  2022-06-13  7:44 ` [gentoo-dev] [PATCH] go-module.eclass: " Florian Schmaus
  2022-06-13  8:29 ` [gentoo-dev] Proposal to " Michał Górny
@ 2022-06-14 17:34 ` Arsen Arsenović
  2022-06-26 23:43 ` Zoltan Puskas
  2022-09-28 15:28 ` Florian Schmaus
  4 siblings, 0 replies; 58+ messages in thread
From: Arsen Arsenović @ 2022-06-14 17:34 UTC (permalink / raw
  To: gentoo-dev; +Cc: William Hubbs, Florian Schmaus

[-- Attachment #1: Type: text/plain, Size: 1787 bytes --]

(replying to the first post here as I believe this post is relevant to 
most, if not all, subthreads)

I've prepared a PoC of an automated solution for vendoring[1] a while 
back (around the start of this whole discussion) that would place trust 
on the infrastructure (though potentially verifiable).

My concept provides two solutions:
1) go mod vendor - not verifiable by users (as vendor tars don't include       
enough information for checksumming - see also [2])
2) modcache - significantly larger but verifiable on the client (against 
existing go.sum). These archives really go up to gigabytes in size as 
opposed to a few megs of vendored tarballs.

Please note that [1] is on a small server, possibly broken, pretty slow, 
and not fit for production yet. Ping me on IRC if you encounter issues 
so that I can "unjam" it.

Also note that this thing doesn't attempt much to figure out how to 
convert a ${PV} or any other format versions, and essentially leaves 
that up to the GOPROXY (with very little extra work, see: [3]).

The proposed solution here is that the developer passes something like 
https://go.gentoo.org/vendor/...@${PV} -> vendor.tar into $SRC_URI, 
which would get initiated with a call to ``pkgdev manifest'' or such 
(possibly authenticated via IP or keys or something, to prevent abuse), 
and be done with it.

The biggest downside I've seen so far (excluding further developing the 
solution) is that some Go programs don't respect the restrictions of the 
Go module system, and thus fail to fetch.

[1]: https://vengor.aarsen.me/
[2]: https://github.com/golang/go/issues/27348
[3]: https://git.sr.ht/~arsen/vengor/tree/ab1ae7b275ab492d4806de88cfbf67e7b97c1ade/item/vengor/__init__.py#L101-127

-- 
Arsen Arsenović

[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 358 bytes --]

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] Re: Proposal to undeprecate EGO_SUM
  2022-06-14 17:03         ` Florian Schmaus
@ 2022-06-15  5:53           ` Michał Górny
  2022-06-17 19:04             ` Michał Górny
  0 siblings, 1 reply; 58+ messages in thread
From: Michał Górny @ 2022-06-15  5:53 UTC (permalink / raw
  To: gentoo-dev, Holger Hoffstätte

On Tue, 2022-06-14 at 19:03 +0200, Florian Schmaus wrote:
> On 14.06.22 18:33, Holger Hoffstätte wrote:
> > So my idea here is: instead of chucking EGO_SUM (automatically
> > generated declarative dependency management) out the window, can we not
> > separate the two and instead of uploading the tarball upload the
> > dependency set instead?
> I think that this idea that has been pitched already (see for example 
> Robin's post [1]), although in a broader non-Go-specific sense and it is 
> one obvious way to move forward.
> 
> An, and probably the largest, obstacle is that this can not be 
> implemented in an eclass alone. Due the sandboxing during the build 
> process, fetching distfiles, which is what we are talking about, is the 
> package managers job and hence, I believe, this would require adustments 
> to the package manager and package manager specification (PMS).
> 
> The basic idea, at least to my understanding (or how I would propose 
> it), is to have a new top-level ebuild variable
> 
> SRC_URI_FILE="https://example.org/manifests/restic-0.13.1.files"
> 
> where restic-0.13.1.files contains lines like
> 
> <SRC_URI> <SIZE> <HASH> [<TARGET_FILENAME>]
> 
> which is, as you nicely demonstrated on the restic ebuild, where the 
> bytes contributing to the ebuild size bloat originate from.
> 
> Those bytes are now outsourced from ::gentoo, can be fetched on-demand, 
> allowing the package manager to download the individual distfiles into 
> DISTDIR, where an, e.g., the go eclass can process them further within 
> the constraints of the security sandbox.
> 

Anything that involves breaking the Portage plan-depgraph / fetch&build
separately would require major architectural changes, so can be rejected
immediately as "not going to be implemented in our lifetimes".

-- 
Best regards,
Michał Górny



^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] [PATCH] go-module.eclass: undeprecate EGO_SUM
  2022-06-13  7:44 ` [gentoo-dev] [PATCH] go-module.eclass: " Florian Schmaus
  2022-06-13  9:49   ` Andrew Ammerlaan
@ 2022-06-17 15:53   ` William Hubbs
  1 sibling, 0 replies; 58+ messages in thread
From: William Hubbs @ 2022-06-17 15:53 UTC (permalink / raw
  To: gentoo-dev

[-- Attachment #1: Type: text/plain, Size: 444 bytes --]

On Mon, Jun 13, 2022 at 09:44:11AM +0200, Florian Schmaus wrote:
> Following the gentoo-dev@ mailing list discussion [1], this
> un-deprecates EGO_SUM.

Sorry I haven't been on this list in a a while (I've been pretty busy
with work).

Did you talk to the pms team about https://bugs.gentoo.org/833567?

I knew about Robin's proposal, but there isn't a working version of that
yet, and I've seen packages  break portage using EGO_SUM.

William

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] Proposal to undeprecate EGO_SUM
  2022-06-13 10:26       ` Ulrich Mueller
@ 2022-06-17 16:27         ` William Hubbs
  2022-10-12 13:01           ` Florian Schmaus
  0 siblings, 1 reply; 58+ messages in thread
From: William Hubbs @ 2022-06-17 16:27 UTC (permalink / raw
  To: gentoo-dev

[-- Attachment #1: Type: text/plain, Size: 1503 bytes --]

On Mon, Jun 13, 2022 at 12:26:43PM +0200, Ulrich Mueller wrote:
> >>>>> On Mon, 13 Jun 2022, Florian Schmaus wrote:
> 
> >>>> Judging from the gentoo-dev@ mailing list discussion [1] about EGO_SUM,
> >>>> where some voices where in agreement that EGO_SUM has its raison d'être,
> >>>> while there where no arguments in favor of eventually removing EGO_SUM,
> >>>> I hereby propose to undeprecate EGO_SUM.
> >>>> 
> >>>> 1: https://archives.gentoo.org/gentoo-dev/message/1a64a8e7694c3ee11cd48a58a95f2faa
> 
> >> Can this be done without requesting changes to package managers?
> 
> > What is 'this' here?
> 
> Undeprecating EGO_SUM.
> 
> > The patchset does not make changes to any package manager, just the
> > go-module eclass.
> 
> > Note that this is not about finding about an alternative to dependency
> > tarballs. It is just about re-allowing EGO_SUM in addition to
> > dependency tarballs for packaging Go software in Gentoo.

Like I said on my earlier reply, there have been packages that break
using EGO_SUM. Also, Robin's proposal will not be happening, if it does,
for some time since it will require an eapi bump and doesn't have a
working implementation.

The most pressing concern about EGO_SUM is that it can make portage
crash because of the size of SRC_URI, so it definitely should not be
preferred over dependency tarballs.

If you want to chat more about this on the list we can, but for now,
let's not undeprecate EGO_SUM in the eclass.

William

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] Re: Proposal to undeprecate EGO_SUM
  2022-06-15  5:53           ` Michał Górny
@ 2022-06-17 19:04             ` Michał Górny
  0 siblings, 0 replies; 58+ messages in thread
From: Michał Górny @ 2022-06-17 19:04 UTC (permalink / raw
  To: gentoo-dev, Holger Hoffstätte

On Wed, 2022-06-15 at 07:53 +0200, Michał Górny wrote:
> On Tue, 2022-06-14 at 19:03 +0200, Florian Schmaus wrote:
> > On 14.06.22 18:33, Holger Hoffstätte wrote:
> > > So my idea here is: instead of chucking EGO_SUM (automatically
> > > generated declarative dependency management) out the window, can we not
> > > separate the two and instead of uploading the tarball upload the
> > > dependency set instead?
> > I think that this idea that has been pitched already (see for example 
> > Robin's post [1]), although in a broader non-Go-specific sense and it is 
> > one obvious way to move forward.
> > 
> > An, and probably the largest, obstacle is that this can not be 
> > implemented in an eclass alone. Due the sandboxing during the build 
> > process, fetching distfiles, which is what we are talking about, is the 
> > package managers job and hence, I believe, this would require adustments 
> > to the package manager and package manager specification (PMS).
> > 
> > The basic idea, at least to my understanding (or how I would propose 
> > it), is to have a new top-level ebuild variable
> > 
> > SRC_URI_FILE="https://example.org/manifests/restic-0.13.1.files"
> > 
> > where restic-0.13.1.files contains lines like
> > 
> > <SRC_URI> <SIZE> <HASH> [<TARGET_FILENAME>]
> > 
> > which is, as you nicely demonstrated on the restic ebuild, where the 
> > bytes contributing to the ebuild size bloat originate from.
> > 
> > Those bytes are now outsourced from ::gentoo, can be fetched on-demand, 
> > allowing the package manager to download the individual distfiles into 
> > DISTDIR, where an, e.g., the go eclass can process them further within 
> > the constraints of the security sandbox.
> > 
> 
> Anything that involves breaking the Portage plan-depgraph / fetch&build
> separately would require major architectural changes, so can be rejected
> immediately as "not going to be implemented in our lifetimes".
> 

Just to be clear, I'm not against this proposal.  In fact, I think it's
probably the best solution that's been proposed so far.  What I wanted
to point out is that we probably don't have anyone who would actually
implement that.

-- 
Best regards,
Michał Górny



^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] Proposal to undeprecate EGO_SUM
  2022-06-13  7:44 [gentoo-dev] Proposal to undeprecate EGO_SUM Florian Schmaus
                   ` (2 preceding siblings ...)
  2022-06-14 17:34 ` [gentoo-dev] " Arsen Arsenović
@ 2022-06-26 23:43 ` Zoltan Puskas
  2022-06-27  6:09   ` Oskari Pirhonen
  2022-07-15 21:34   ` William Hubbs
  2022-09-28 15:28 ` Florian Schmaus
  4 siblings, 2 replies; 58+ messages in thread
From: Zoltan Puskas @ 2022-06-26 23:43 UTC (permalink / raw
  To: gentoo-dev

Hi,

I've been working on adding a go based ebuild to Gentoo yesterday and I 
got this warning form portage saying that EGO_SUM is deprecated and 
should be avoided. Since I remember there was an intense discussion 
about this on the ML I went back and have re-read the threads before 
writing this piece. I'd like to provide my perspective as user, a 
proxied maintainer, and overlay owner. I also run a private mirror on my 
LAN to serve my hosts in order to reduce load on external mirrors.

Before diving in I think it's worth reading mgorny's blog post "The 
modern packager’s security nightmare"[1] as it's relevant to the 
discussion, and something I deeply agree with.

With all that being said, I feel that the tarball idea is a bad due to 
many reasons.

 From security point of view, I understand that we still have to trust 
maintainers not to do funky stuff, but I think this issue goes beyond 
that.

First of all one of the advantages of Gentoo is that it gets it's source 
code from upstream (yes, I'm aware of mirrors acting as a cache layer), 
which means that poisoning source code needs to be done at upstream 
level (effectively means hacking GitHub, PyPi, or some standalone 
project's Gitea/cgit/gitlab/etc. instance or similar), sources which 
either have more scrutiny or have a limited blast radius.

Additionally if an upstream dependency has a security issue it's easier 
to scan all EGO_SUM content and find packages that potentially depend on 
a broken dependency and force a re-pinning and rebuild. The tarball 
magic hides this completely and makes searching very expensive.

In fact using these vendor tarballs is the equivalent of "static 
linking" in the packaging space. Why are we introducing the same issue 
in the repository space? This kills the reusability of already 
downloaded dependencies and bloats storage requirements. This is 
especially bad on laptops, where SSD free space might be limited, in 
case the user does not nuke their distfiles after each upgrade.

Considering that BTRFS (and possibly other filesystems) support on the 
fly compression the physical cost of a few inflated ebuilds and 
Manifests is actually way smaller than the logical size would indicate. 
Compare that to the huge incompressible tarballs that now we need to 
store.

As a proxied maintainer or overlay owner hosting these huge tarballs 
also becomes problem (i.e. we need some public space with potentially 
gigabytes of free space and enough bandwidth to push that to users). 
Pushing toward vendor tarballs creates an extra expense on every level 
(Gentoo infra, mirrors, proxy maintainers, overlay owners, users).

If bloating portage is a big issue and we frown upon go stuff anyway (or 
only a few users need these packages), why not consider moving all go 
packages into an officially supported go packages only overlay? I 
understand that this would not solve the kernel buffer issue where we 
run out of environment variable space, but it would debloat the main 
portage tree.

It also breaks reproducibility. With EGO_SUM I can check out an older 
version of portage tree (well to some extent) and rebuild packages since 
dependency upstream is very likely to host old versions of their source. 
With the tarballs this breaks since as soon as an ebuild is dropped from 
mainline portage the vendor tarballs follow them too. There is no way 
for the user to roll back a package a few weeks back (e.g. if new 
version has bugs), unlike with EGO_SUM.

In fact I feel this goes against the spirit of portage too, since now 
instead of "just describing" how to obtain sources and build them, now 
it now depends on essentially ephemeral blobs, which happens to be 
externalized from the portage tree itself. I'm aware that we have 
ebuilds that pull in patches and other stuff from dev space already, but 
we shouldn't make this even worse.

Finally with EGO_SUM we had a nice tool get-ego-vendor which produced 
the EGO_SUM for maintainers which has made maintenance easier. However I 
haven't found any new guidance yet on how to maintain go packages with 
the new tarball method (e.g. what needs to go into the vendor tarball, 
what changes are needed in ebuilds). Overall this complifates further 
ebuild development and verification of PRs.

In summary, IMHO the EGO_SUM way of handling of go packages has more 
benefits than drawbacks compared to the vendor tarballs.

Cheers,
Zoltan

[1] 
https://blogs.gentoo.org/mgorny/2021/02/19/the-modern-packagers-security-nightmare/


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] Proposal to undeprecate EGO_SUM
  2022-06-26 23:43 ` Zoltan Puskas
@ 2022-06-27  6:09   ` Oskari Pirhonen
  2022-06-27  7:14     ` Zoltan Puskas
  2022-07-15 21:34   ` William Hubbs
  1 sibling, 1 reply; 58+ messages in thread
From: Oskari Pirhonen @ 2022-06-27  6:09 UTC (permalink / raw
  To: gentoo-dev

[-- Attachment #1: Type: text/plain, Size: 5879 bytes --]

On Mon, Jun 27, 2022 at 01:43:19 +0200, Zoltan Puskas wrote:
> Hi,
> 
> I've been working on adding a go based ebuild to Gentoo yesterday and I 
> got this warning form portage saying that EGO_SUM is deprecated and 
> should be avoided. Since I remember there was an intense discussion 
> about this on the ML I went back and have re-read the threads before 
> writing this piece. I'd like to provide my perspective as user, a 
> proxied maintainer, and overlay owner. I also run a private mirror on my 
> LAN to serve my hosts in order to reduce load on external mirrors.
> 
> Before diving in I think it's worth reading mgorny's blog post "The 
> modern packager’s security nightmare"[1] as it's relevant to the 
> discussion, and something I deeply agree with.
> 
> With all that being said, I feel that the tarball idea is a bad due to 
> many reasons.
> 
>  From security point of view, I understand that we still have to trust 
> maintainers not to do funky stuff, but I think this issue goes beyond 
> that.
> 
> First of all one of the advantages of Gentoo is that it gets it's source 
> code from upstream (yes, I'm aware of mirrors acting as a cache layer), 
> which means that poisoning source code needs to be done at upstream 
> level (effectively means hacking GitHub, PyPi, or some standalone 
> project's Gitea/cgit/gitlab/etc. instance or similar), sources which 
> either have more scrutiny or have a limited blast radius.
> 
> Additionally if an upstream dependency has a security issue it's easier 
> to scan all EGO_SUM content and find packages that potentially depend on 
> a broken dependency and force a re-pinning and rebuild. The tarball 
> magic hides this completely and makes searching very expensive.
> 
> In fact using these vendor tarballs is the equivalent of "static 
> linking" in the packaging space. Why are we introducing the same issue 
> in the repository space? This kills the reusability of already 
> downloaded dependencies and bloats storage requirements. This is 
> especially bad on laptops, where SSD free space might be limited, in 
> case the user does not nuke their distfiles after each upgrade.
> 
> Considering that BTRFS (and possibly other filesystems) support on the 
> fly compression the physical cost of a few inflated ebuilds and 
> Manifests is actually way smaller than the logical size would indicate. 
> Compare that to the huge incompressible tarballs that now we need to 
> store.
> 
> As a proxied maintainer or overlay owner hosting these huge tarballs 
> also becomes problem (i.e. we need some public space with potentially 
> gigabytes of free space and enough bandwidth to push that to users). 
> Pushing toward vendor tarballs creates an extra expense on every level 
> (Gentoo infra, mirrors, proxy maintainers, overlay owners, users).
> 
> If bloating portage is a big issue and we frown upon go stuff anyway (or 
> only a few users need these packages), why not consider moving all go 
> packages into an officially supported go packages only overlay? I 
> understand that this would not solve the kernel buffer issue where we 
> run out of environment variable space, but it would debloat the main 
> portage tree.
> 

Rephrasing this just to ensure I'm understanding it correctly: you're
suggesting to move _everything_ that uses Go into its own overlay. Let's
call it gentoo-go for the sake of the example.

If the above is accurate, then I hard disagree.

The biggest package that I have that uses Go is docker (and accompanying
tools). Personal distaste of docker aside, it's a very popular piece of
software, and I don't think it's fair to require all the people who want
to use it to first enable and sync gentoo-go before they can install it.

And what about transitive dependencies? Suppose app-misc/cool-package is
written in some language that isn't Go, but it has a dependency on
sys-apps/cool-util which has a dependency on something written in Go.
Should a user wanting to install cool-package have to enable the
gentoo-go overlay now too? Even though app-misc/cool-package would look
like it doesn't need the overlay unless you dig into the deps.

Not a dev, just a user who really likes Gentoo :)

- Oskari

> It also breaks reproducibility. With EGO_SUM I can check out an older 
> version of portage tree (well to some extent) and rebuild packages since 
> dependency upstream is very likely to host old versions of their source. 
> With the tarballs this breaks since as soon as an ebuild is dropped from 
> mainline portage the vendor tarballs follow them too. There is no way 
> for the user to roll back a package a few weeks back (e.g. if new 
> version has bugs), unlike with EGO_SUM.
> 
> In fact I feel this goes against the spirit of portage too, since now 
> instead of "just describing" how to obtain sources and build them, now 
> it now depends on essentially ephemeral blobs, which happens to be 
> externalized from the portage tree itself. I'm aware that we have 
> ebuilds that pull in patches and other stuff from dev space already, but 
> we shouldn't make this even worse.
> 
> Finally with EGO_SUM we had a nice tool get-ego-vendor which produced 
> the EGO_SUM for maintainers which has made maintenance easier. However I 
> haven't found any new guidance yet on how to maintain go packages with 
> the new tarball method (e.g. what needs to go into the vendor tarball, 
> what changes are needed in ebuilds). Overall this complifates further 
> ebuild development and verification of PRs.
> 
> In summary, IMHO the EGO_SUM way of handling of go packages has more 
> benefits than drawbacks compared to the vendor tarballs.
> 
> Cheers,
> Zoltan
> 
> [1] 
> https://blogs.gentoo.org/mgorny/2021/02/19/the-modern-packagers-security-nightmare/
> 

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] Proposal to undeprecate EGO_SUM
  2022-06-27  6:09   ` Oskari Pirhonen
@ 2022-06-27  7:14     ` Zoltan Puskas
  0 siblings, 0 replies; 58+ messages in thread
From: Zoltan Puskas @ 2022-06-27  7:14 UTC (permalink / raw
  To: gentoo-dev

[-- Attachment #1: Type: text/plain, Size: 1874 bytes --]

Hey,

>
> Rephrasing this just to ensure I'm understanding it correctly: you're
> suggesting to move _everything_ that uses Go into its own overlay. Let's
> call it gentoo-go for the sake of the example.
>
> If the above is accurate, then I hard disagree.

Yes, that was the suggestion, you understood it correctly.

>
> The biggest package that I have that uses Go is docker (and accompanying
> tools). Personal distaste of docker aside, it's a very popular piece of
> software, and I don't think it's fair to require all the people who want
> to use it to first enable and sync gentoo-go before they can install it.

It could be enabled by default for everyone, and people would have the choice to
disable it or mask everything except what they are using in that case, so the
extra user toil could be avoided by a creaful rollout. I'm not saying it would
be an elegant solution though.

>
> And what about transitive dependencies? Suppose app-misc/cool-package is
> written in some language that isn't Go, but it has a dependency on
> sys-apps/cool-util which has a dependency on something written in Go.
> Should a user wanting to install cool-package have to enable the
> gentoo-go overlay now too? Even though app-misc/cool-package would look
> like it doesn't need the overlay unless you dig into the deps.

This is however a valid point, something I did not consider.

Any reverse dependencies (i.e. packages in main portage tree depending on
gentoo-go) would be anithetical to the overlay philosopy (the other direction of
dependencies is okay though). This invalidates my separate overlay
suggestion, consider it withdrawn.

However I think that my other points still stand, until someone convinces
me otherwise.

>
> Not a dev, just a user who really likes Gentoo :)

Thanks for your perspective, it was a valueable observation. :)

>
> - Oskari
>

Cheers,
Zoltan

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] Proposal to undeprecate EGO_SUM
  2022-06-26 23:43 ` Zoltan Puskas
  2022-06-27  6:09   ` Oskari Pirhonen
@ 2022-07-15 21:34   ` William Hubbs
  2022-07-16 11:24     ` Florian Schmaus
  1 sibling, 1 reply; 58+ messages in thread
From: William Hubbs @ 2022-07-15 21:34 UTC (permalink / raw
  To: gentoo-dev

[-- Attachment #1: Type: text/plain, Size: 4438 bytes --]

On Mon, Jun 27, 2022 at 01:43:19AM +0200, Zoltan Puskas wrote:

*snip*

> First of all one of the advantages of Gentoo is that it gets it's source 
> code from upstream (yes, I'm aware of mirrors acting as a cache layer), 
> which means that poisoning source code needs to be done at upstream 
> level (effectively means hacking GitHub, PyPi, or some standalone 
> project's Gitea/cgit/gitlab/etc. instance or similar), sources which 
> either have more scrutiny or have a limited blast radius.

I don't quite follow what you mean.
Upstream for go modules is actually proxy.golang.org, or some other
similar proxy, which the go tooling knows how to access [1].

> Additionally if an upstream dependency has a security issue it's easier 
> to scan all EGO_SUM content and find packages that potentially depend on 
> a broken dependency and force a re-pinning and rebuild. The tarball 
> magic hides this completely and makes searching very expensive.

I'm not comfortable at all with us changing the dependencies like this
downstream for the same reason the Debian folks ultimately were against
it for kubernetes. If you make these kinds of changes you are affectively
creating a fork, and that would mean we would be building packages with untested
libraries [2].

*snip*

> Considering that BTRFS (and possibly other filesystems) support on the 
> fly compression the physical cost of a few inflated ebuilds and 

The problem here is the size of SRC_URI when you add the EGO_SUM_SRC_URI
to it. SRC_URI gets exported to the environment, so it can crash portage
if it is too big.

> Manifests is actually way smaller than the logical size would indicate. 
> Compare that to the huge incompressible tarballs that now we need to 
> store.
> 
> As a proxied maintainer or overlay owner hosting these huge tarballs 
> also becomes problem (i.e. we need some public space with potentially 
> gigabytes of free space and enough bandwidth to push that to users). 
> Pushing toward vendor tarballs creates an extra expense on every level 
> (Gentoo infra, mirrors, proxy maintainers, overlay owners, users).

I agree that creating the dependency tarballs is not ideal. We asked for
another option [3], but as you can see from the bug this was refused by
the PMS team. That refusal is the only reason we have to worry about
dependency tarballs.

> It also breaks reproducibility. With EGO_SUM I can check out an older 
> version of portage tree (well to some extent) and rebuild packages since 
> dependency upstream is very likely to host old versions of their source. 
> With the tarballs this breaks since as soon as an ebuild is dropped from 
> mainline portage the vendor tarballs follow them too. There is no way 
> for the user to roll back a package a few weeks back (e.g. if new 
> version has bugs), unlike with EGO_SUM.

The contents of a dependency tarball is created using "go mod download",
which is controlled by the go.mod/go.sum files in the package. So, it is
possible to recreate the dependency tarball any time.

I do not see any advantage EGO_SUM offers over the dependency tarballs
in this space.

> Finally with EGO_SUM we had a nice tool get-ego-vendor which produced 
> the EGO_SUM for maintainers which has made maintenance easier. However I 
> haven't found any new guidance yet on how to maintain go packages with 
> the new tarball method (e.g. what needs to go into the vendor tarball, 
> what changes are needed in ebuilds). Overall this complifates further 
> ebuild development and verification of PRs.

The documentation for how to build dependency tarballs is in the eclass.
The GOMODCACHE environment variable is used in the eclass to point to
the location where the dependency tarball is unpacked, and that location
is read by the normal go tooling.

> In summary, IMHO the EGO_SUM way of handling of go packages has more 
> benefits than drawbacks compared to the vendor tarballs.

EGO_SUM can cause portage to break; that is the primary reason support
is going away.

We attempted another solution that was refused, so the only option we
have currently is to build the dependency tarballs.

> 
> Cheers,
> Zoltan
> 
> [1] 
> https://blogs.gentoo.org/mgorny/2021/02/19/the-modern-packagers-security-nightmare/
> 

[1] https://go.dev/ref/mod
[2] https://lwn.net/Articles/835599/
[3] https://bugs.gentoo.org/833567

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] Proposal to undeprecate EGO_SUM
  2022-07-15 21:34   ` William Hubbs
@ 2022-07-16 11:24     ` Florian Schmaus
  2022-07-16 11:58       ` Joonas Niilola
  0 siblings, 1 reply; 58+ messages in thread
From: Florian Schmaus @ 2022-07-16 11:24 UTC (permalink / raw
  To: gentoo-dev; +Cc: William Hubbs

On 15/07/2022 23.34, William Hubbs wrote:
> On Mon, Jun 27, 2022 at 01:43:19AM +0200, Zoltan Puskas wrote:
>> In summary, IMHO the EGO_SUM way of handling of go packages has more
>> benefits than drawbacks compared to the vendor tarballs.
> 
> EGO_SUM can cause portage to break; that is the primary reason support
> is going away.
> 
> We attempted another solution that was refused, so the only option we
> have currently is to build the dependency tarballs.

That reads as if you wrote it under the assumption that we can only 
either use dependency tarballs or use EGO_SUM. At the same time, I have 
not seen an argument why we can not simply do *both*.

EGO_SUM has numerous advantages over dependency tarballs, but can not be 
used if the size of the EGO_SUM value crosses a threshold. So why not 
mandate dependency tarballs if a point is crossed and otherwise allow 
EGO_SUM? That way, we could have the best of both worlds.

- Flow






^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] Proposal to undeprecate EGO_SUM
  2022-07-16 11:24     ` Florian Schmaus
@ 2022-07-16 11:58       ` Joonas Niilola
  2022-07-16 17:51         ` William Hubbs
  0 siblings, 1 reply; 58+ messages in thread
From: Joonas Niilola @ 2022-07-16 11:58 UTC (permalink / raw
  To: gentoo-dev


[-- Attachment #1.1: Type: text/plain, Size: 652 bytes --]

On 16.7.2022 14.24, Florian Schmaus wrote:
> 
> That reads as if you wrote it under the assumption that we can only
> either use dependency tarballs or use EGO_SUM. At the same time, I have
> not seen an argument why we can not simply do *both*.
> 
> EGO_SUM has numerous advantages over dependency tarballs, but can not be
> used if the size of the EGO_SUM value crosses a threshold. So why not
> mandate dependency tarballs if a point is crossed and otherwise allow
> EGO_SUM? That way, we could have the best of both worlds.
> 
> - Flow
> 

++ this sounds most sensible. This is also how I've understood your
proposal.

-- juippis

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 618 bytes --]

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] Proposal to undeprecate EGO_SUM
  2022-07-16 11:58       ` Joonas Niilola
@ 2022-07-16 17:51         ` William Hubbs
  2022-07-16 18:31           ` Arthur Zamarin
  0 siblings, 1 reply; 58+ messages in thread
From: William Hubbs @ 2022-07-16 17:51 UTC (permalink / raw
  To: gentoo-dev; +Cc: mgorny

[-- Attachment #1: Type: text/plain, Size: 1025 bytes --]

On Sat, Jul 16, 2022 at 02:58:04PM +0300, Joonas Niilola wrote:
> On 16.7.2022 14.24, Florian Schmaus wrote:
> > 
> > That reads as if you wrote it under the assumption that we can only
> > either use dependency tarballs or use EGO_SUM. At the same time, I have
> > not seen an argument why we can not simply do *both*.
> > 
> > EGO_SUM has numerous advantages over dependency tarballs, but can not be
> > used if the size of the EGO_SUM value crosses a threshold. So why not
> > mandate dependency tarballs if a point is crossed and otherwise allow
> > EGO_SUM? That way, we could have the best of both worlds.
> > 
> > - Flow
> > 
> 
> ++ this sounds most sensible. This is also how I've understood your
> proposal.

Remember that with EGO_SUM all of the bloated manifests and ebuilds are
on every user's system.

I added mgorny as a cc to this message because he made it pretty clear
at some point in the previous discussion that the size of these ebuilds
and manifests is unacceptable.

William

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] Proposal to undeprecate EGO_SUM
  2022-07-16 17:51         ` William Hubbs
@ 2022-07-16 18:31           ` Arthur Zamarin
  2022-07-16 18:46             ` Robin H. Johnson
  0 siblings, 1 reply; 58+ messages in thread
From: Arthur Zamarin @ 2022-07-16 18:31 UTC (permalink / raw
  To: gentoo-dev, mgorny


[-- Attachment #1.1: Type: text/plain, Size: 970 bytes --]

On 16/07/2022 20.51, William Hubbs wrote:
> On Sat, Jul 16, 2022 at 02:58:04PM +0300, Joonas Niilola wrote:
>> On 16.7.2022 14.24, Florian Schmaus wrote:
>>>
>>
>> ++ this sounds most sensible. This is also how I've understood your
>> proposal.
> 
> Remember that with EGO_SUM all of the bloated manifests and ebuilds are
> on every user's system.
> 
> I added mgorny as a cc to this message because he made it pretty clear
> at some point in the previous discussion that the size of these ebuilds
> and manifests is unacceptable.
> 
> William

I want to give another option. Both ways are allowed by eclass, but by
QA policy (or some other decision), it is prohibited to use EGO_SUM in
main ::gentoo tree.

As a result, overlays and ::guru can use the EGO_SUM or dist distfile
(remember, they don't have access to hosting on dev.g.o).

-- 
Arthur Zamarin
arthurzam@gentoo.org
Gentoo Linux developer (Python, Arch Teams, pkgcore stack, GURU)

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] Proposal to undeprecate EGO_SUM
  2022-07-16 18:31           ` Arthur Zamarin
@ 2022-07-16 18:46             ` Robin H. Johnson
  2022-07-16 19:35               ` William Hubbs
  0 siblings, 1 reply; 58+ messages in thread
From: Robin H. Johnson @ 2022-07-16 18:46 UTC (permalink / raw
  To: gentoo-dev; +Cc: mgorny

[-- Attachment #1: Type: text/plain, Size: 815 bytes --]

On Sat, Jul 16, 2022 at 09:31:35PM +0300, Arthur Zamarin wrote:
> I want to give another option. Both ways are allowed by eclass, but by
> QA policy (or some other decision), it is prohibited to use EGO_SUM in
> main ::gentoo tree.
> 
> As a result, overlays and ::guru can use the EGO_SUM or dist distfile
> (remember, they don't have access to hosting on dev.g.o).
Yes; this is the option I was trying to propose as an intermediate step
until we have indirect Manifests that provide the best of both worlds
(not bloating the tree, and not requiring creation of dep tarballs).


-- 
Robin Hugh Johnson
Gentoo Linux: Dev, Infra Lead, Foundation Treasurer
E-Mail   : robbat2@gentoo.org
GnuPG FP : 11ACBA4F 4778E3F6 E4EDF38E B27B944E 34884E85
GnuPG FP : 7D0B3CEB E9B85B1F 825BCECF EE05E6F6 A48F6136

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 1113 bytes --]

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] Proposal to undeprecate EGO_SUM
  2022-07-16 18:46             ` Robin H. Johnson
@ 2022-07-16 19:35               ` William Hubbs
  2022-07-16 20:20                 ` Ulrich Mueller
  0 siblings, 1 reply; 58+ messages in thread
From: William Hubbs @ 2022-07-16 19:35 UTC (permalink / raw
  To: gentoo-dev

[-- Attachment #1: Type: text/plain, Size: 1189 bytes --]

On Sat, Jul 16, 2022 at 06:46:40PM +0000, Robin H. Johnson wrote:
> On Sat, Jul 16, 2022 at 09:31:35PM +0300, Arthur Zamarin wrote:
> > I want to give another option. Both ways are allowed by eclass, but by
> > QA policy (or some other decision), it is prohibited to use EGO_SUM in
> > main ::gentoo tree.
> > 
> > As a result, overlays and ::guru can use the EGO_SUM or dist distfile
> > (remember, they don't have access to hosting on dev.g.o).
> Yes; this is the option I was trying to propose as an intermediate step
> until we have indirect Manifests that provide the best of both worlds
> (not bloating the tree, and not requiring creation of dep tarballs).

I could force this in the eclass with the following flow if I know how
to tell if the ebuild inheriting it is in the main tree or not:

# in_main_tree is a place holder for a test to see if the ebuld running
# this is in the tree
	if [[ -n ${EGO_SUM} && in_main_tree ]]; then
		eqawarn "EGO_SUM is not allowed in the main tree"
		eqawarn "This will become a fatal error in the future"
	fi

	The only question is, is there a way to reliably tell whether or not
	we are  in the main tree?

William


[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] Proposal to undeprecate EGO_SUM
  2022-07-16 19:35               ` William Hubbs
@ 2022-07-16 20:20                 ` Ulrich Mueller
  2022-07-17  1:37                   ` William Hubbs
  0 siblings, 1 reply; 58+ messages in thread
From: Ulrich Mueller @ 2022-07-16 20:20 UTC (permalink / raw
  To: gentoo-dev

[-- Attachment #1: Type: text/plain, Size: 864 bytes --]

>>>>> On Sat, 16 Jul 2022, William Hubbs wrote:

> I could force this in the eclass with the following flow if I know how
> to tell if the ebuild inheriting it is in the main tree or not:

> # in_main_tree is a place holder for a test to see if the ebuld running
> # this is in the tree
> 	if [[ -n ${EGO_SUM} && in_main_tree ]]; then
> 		eqawarn "EGO_SUM is not allowed in the main tree"
> 		eqawarn "This will become a fatal error in the future"
> 	fi

> 	The only question is, is there a way to reliably tell whether or not
> 	we are  in the main tree?

An eclass has no legitimate way to find out in which repository it is.
The rationale is that users should be able to copy ebuilds and eclasses
to their local overlays, and they should work there in the same way.

There is an internal (and undocumented) Portage variable, but that
shouldn't be used.

Ulrich

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 507 bytes --]

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] Proposal to undeprecate EGO_SUM
  2022-07-16 20:20                 ` Ulrich Mueller
@ 2022-07-17  1:37                   ` William Hubbs
  0 siblings, 0 replies; 58+ messages in thread
From: William Hubbs @ 2022-07-17  1:37 UTC (permalink / raw
  To: gentoo-dev

[-- Attachment #1: Type: text/plain, Size: 851 bytes --]

On Sat, Jul 16, 2022 at 10:20:01PM +0200, Ulrich Mueller wrote:
> >>>>> On Sat, 16 Jul 2022, William Hubbs wrote:
> > 	The only question is, is there a way to reliably tell whether or not
> > 	we are  in the main tree?
> 
> An eclass has no legitimate way to find out in which repository it is.
> The rationale is that users should be able to copy ebuilds and eclasses
> to their local overlays, and they should work there in the same way.
> 
> There is an internal (and undocumented) Portage variable, but that
> shouldn't be used.

In that case, I'm left with two options.

1) continue with deprecating and removing EGO_SUM.

2) (suggested on IRC) allow EGO_SUM as long as it has below a certain
low number of entries. It would need to be kept small to keep ebuilds
and manifests from bloating too much.

Thoughts?

William


[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] Proposal to undeprecate EGO_SUM
  2022-06-13  7:44 [gentoo-dev] Proposal to undeprecate EGO_SUM Florian Schmaus
                   ` (3 preceding siblings ...)
  2022-06-26 23:43 ` Zoltan Puskas
@ 2022-09-28 15:28 ` Florian Schmaus
  2022-09-28 16:31   ` Ulrich Mueller
                     ` (2 more replies)
  4 siblings, 3 replies; 58+ messages in thread
From: Florian Schmaus @ 2022-09-28 15:28 UTC (permalink / raw
  To: gentoo-dev

I would like to continue discussing whether we should entirely deprecate 
EGO_SUM without the desire to offend anyone.

We now have a pending GitHub PR that bumps restic to 0.14 [1]. Restic is 
a very popular backup software written in Go. The PR drops EGO_SUM in 
favor of a vendor tarball created by the proxied maintainer. However, I 
am unaware of any tool that lets you practically audit the 35 MiB source 
contained in the tarball. And even if such a tool exists, this would 
mean another manual step is required, which is, potentially, skipped 
most of the time, weakening our user's security. This is because I 
believe neither our tooling, e.g., go-mod.eclass, nor any Golang 
tooling, does authenticate the contents of the vendor tarball against 
upstream's go.sum. But please correct me if I am wrong.

I wonder if we can reach consensus around un-depreacting EGO_SUM, but 
discouraging its usage in certain situations. That is, provide EGO_SUM 
as option but disallow its use if
1.) *upstream* provides a vendor tarball
2.) the number of EGO_SUM entries exceeds 1000 and a Gentoo developer 
maintains the package
3.) the number of EGO_SUM entries exceeds 1500 and a proxied maintainer 
maintains the package

In case of 3, I would encourage proxy maintainers to create and provide 
the vendor tarball.

The suggested EGO_SUM limits result from a histogram that I created 
analyzing ::gentoo at 2022-01-01, i.e., a few months before EGO_SUM was 
deprecated.

- Flow

1: https://github.com/gentoo/gentoo/pull/27050


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] Proposal to undeprecate EGO_SUM
  2022-09-28 15:28 ` Florian Schmaus
@ 2022-09-28 16:31   ` Ulrich Mueller
  2022-09-30  0:36     ` William Hubbs
  2022-09-28 21:23   ` John Helmert III
  2022-09-30 19:02   ` Georgy Yakovlev
  2 siblings, 1 reply; 58+ messages in thread
From: Ulrich Mueller @ 2022-09-28 16:31 UTC (permalink / raw
  To: Florian Schmaus; +Cc: gentoo-dev

[-- Attachment #1: Type: text/plain, Size: 1487 bytes --]

>>>>> On Wed, 28 Sep 2022, Florian Schmaus wrote:

> I would like to continue discussing whether we should entirely
> deprecate EGO_SUM without the desire to offend anyone.

> We now have a pending GitHub PR that bumps restic to 0.14 [1]. Restic
> is a very popular backup software written in Go. The PR drops EGO_SUM
> in favor of a vendor tarball created by the proxied maintainer.
> However, I am unaware of any tool that lets you practically audit the
> 35 MiB source contained in the tarball. And even if such a tool
> exists, this would mean another manual step is required, which is,
> potentially, skipped most of the time, weakening our user's security.
> This is because I believe neither our tooling, e.g., go-mod.eclass,
> nor any Golang tooling, does authenticate the contents of the vendor
> tarball against upstream's go.sum. But please correct me if I am
> wrong.

> I wonder if we can reach consensus around un-depreacting EGO_SUM, but
> discouraging its usage in certain situations. That is, provide EGO_SUM
> as option but disallow its use if
> 1.) *upstream* provides a vendor tarball
> 2.) the number of EGO_SUM entries exceeds 1000 and a Gentoo developer
> maintains the package
> 3.) the number of EGO_SUM entries exceeds 1500 and a proxied
> maintainer maintains the package

These numbers seem quite large, compared to the mean number of 3.4
distfiles for packages in the Gentoo repository. (The median and the
99-percentile are 1 and 22, respectively.)

Ulrich

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 507 bytes --]

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] Proposal to undeprecate EGO_SUM
  2022-09-28 15:28 ` Florian Schmaus
  2022-09-28 16:31   ` Ulrich Mueller
@ 2022-09-28 21:23   ` John Helmert III
  2022-09-30 13:57     ` Florian Schmaus
  2022-09-30 19:02   ` Georgy Yakovlev
  2 siblings, 1 reply; 58+ messages in thread
From: John Helmert III @ 2022-09-28 21:23 UTC (permalink / raw
  To: gentoo-dev

[-- Attachment #1: Type: text/plain, Size: 1819 bytes --]

On Wed, Sep 28, 2022 at 05:28:00PM +0200, Florian Schmaus wrote:
> I would like to continue discussing whether we should entirely deprecate 
> EGO_SUM without the desire to offend anyone.
> 
> We now have a pending GitHub PR that bumps restic to 0.14 [1]. Restic is 
> a very popular backup software written in Go. The PR drops EGO_SUM in 
> favor of a vendor tarball created by the proxied maintainer. However, I 
> am unaware of any tool that lets you practically audit the 35 MiB source 
> contained in the tarball. And even if such a tool exists, this would 
> mean another manual step is required, which is, potentially, skipped 
> most of the time, weakening our user's security. This is because I 
> believe neither our tooling, e.g., go-mod.eclass, nor any Golang 
> tooling, does authenticate the contents of the vendor tarball against 
> upstream's go.sum. But please correct me if I am wrong.
> 
> I wonder if we can reach consensus around un-depreacting EGO_SUM, but 
> discouraging its usage in certain situations. That is, provide EGO_SUM 
> as option but disallow its use if
> 1.) *upstream* provides a vendor tarball
> 2.) the number of EGO_SUM entries exceeds 1000 and a Gentoo developer 
> maintains the package
> 3.) the number of EGO_SUM entries exceeds 1500 and a proxied maintainer 
> maintains the package

I'm not sure I agree on these limits, given the authenticity problem
exists regardless of how many dependencies there are.

> In case of 3, I would encourage proxy maintainers to create and provide 
> the vendor tarball.
> 
> The suggested EGO_SUM limits result from a histogram that I created 
> analyzing ::gentoo at 2022-01-01, i.e., a few months before EGO_SUM was 
> deprecated.
> 
> - Flow
> 
> 1: https://github.com/gentoo/gentoo/pull/27050
> 

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] Proposal to undeprecate EGO_SUM
  2022-09-28 16:31   ` Ulrich Mueller
@ 2022-09-30  0:36     ` William Hubbs
  2022-09-30 14:53       ` Florian Schmaus
  2022-09-30 20:07       ` Arsen Arsenović
  0 siblings, 2 replies; 58+ messages in thread
From: William Hubbs @ 2022-09-30  0:36 UTC (permalink / raw
  To: gentoo-dev

[-- Attachment #1: Type: text/plain, Size: 3173 bytes --]

On Wed, Sep 28, 2022 at 06:31:39PM +0200, Ulrich Mueller wrote:
> >>>>> On Wed, 28 Sep 2022, Florian Schmaus wrote:
> 
> > I would like to continue discussing whether we should entirely
> > deprecate EGO_SUM without the desire to offend anyone.

Don't worry, I am not offended. I just haven't found a simple way to do
this. Sure, I will continue the discussion.

> > We now have a pending GitHub PR that bumps restic to 0.14 [1]. Restic
> > is a very popular backup software written in Go. The PR drops EGO_SUM
> > in favor of a vendor tarball created by the proxied maintainer.
> > However, I am unaware of any tool that lets you practically audit the
> > 35 MiB source contained in the tarball. And even if such a tool
> > exists, this would mean another manual step is required, which is,
> > potentially, skipped most of the time, weakening our user's security.
> > This is because I believe neither our tooling, e.g., go-mod.eclass,
> > nor any Golang tooling, does authenticate the contents of the vendor
> > tarball against upstream's go.sum. But please correct me if I am
> > wrong.

I don't know for certain about a vendor tarball, but I do know there are
instances where a vendor tarball wouldn't work.
app-containers/containerd is a good example of this, That is why the
vendor tarball idea was dropped.

Go modules are verified by go tooling. That is why I went with a
dependency tarball.

> > I wonder if we can reach consensus around un-depreacting EGO_SUM, but
> > discouraging its usage in certain situations. That is, provide EGO_SUM
> > as option but disallow its use if
> > 1.) *upstream* provides a vendor tarball

Upstream doesn't need to provide a tarball, just an up-to-date "vendor"
directory at the top level of the project. Two examples that do this are
docker and kubernetes.

If the "vendor" directory is in the project, EGO_SUM should not be used.
This is already documented in the eclass.

> > 2.) the number of EGO_SUM entries exceeds 1000 and a Gentoo developer
> > maintains the package
> > 3.) the number of EGO_SUM entries exceeds 1500 and a proxied
> > maintainer maintains the package
> 
> These numbers seem quite large, compared to the mean number of 3.4
> distfiles for packages in the Gentoo repository. (The median and the
> 99-percentile are 1 and 22, respectively.)

There is no way from within portage to tell whether a proxied maintainer
or a developer maintains the package, and I don't think we should care.
We don't want different qa standards for packages in the tree based on
who maintains them.

I think we should settle on one limit. I could check for that limit inside
the eclass and make the ebuild process die if the limit is not observed.

The concern, as I understand it, is about the sizes of the ebuilds and
manifests for go software. Since the number of distfiles was mentioned,
I will add it here and show it in my example numbers below.

To stay with your example, restic has a 300k manifest, multiple 30k+
ebuilds and897 distfiles.

I'm thinking the limit would have to be much lower. Say, around 256
entries in EGO_SUM_SRC_URI.

William


[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] Proposal to undeprecate EGO_SUM
  2022-09-28 21:23   ` John Helmert III
@ 2022-09-30 13:57     ` Florian Schmaus
  2022-09-30 14:36       ` Jaco Kroon
  0 siblings, 1 reply; 58+ messages in thread
From: Florian Schmaus @ 2022-09-30 13:57 UTC (permalink / raw
  To: gentoo-dev


[-- Attachment #1.1.1: Type: text/plain, Size: 2546 bytes --]

On 28/09/2022 23.23, John Helmert III wrote:
> On Wed, Sep 28, 2022 at 05:28:00PM +0200, Florian Schmaus wrote:
>> I would like to continue discussing whether we should entirely deprecate
>> EGO_SUM without the desire to offend anyone.
>>
>> We now have a pending GitHub PR that bumps restic to 0.14 [1]. Restic is
>> a very popular backup software written in Go. The PR drops EGO_SUM in
>> favor of a vendor tarball created by the proxied maintainer. However, I
>> am unaware of any tool that lets you practically audit the 35 MiB source
>> contained in the tarball. And even if such a tool exists, this would
>> mean another manual step is required, which is, potentially, skipped
>> most of the time, weakening our user's security. This is because I
>> believe neither our tooling, e.g., go-mod.eclass, nor any Golang
>> tooling, does authenticate the contents of the vendor tarball against
>> upstream's go.sum. But please correct me if I am wrong.
>>
>> I wonder if we can reach consensus around un-depreacting EGO_SUM, but
>> discouraging its usage in certain situations. That is, provide EGO_SUM
>> as option but disallow its use if
>> 1.) *upstream* provides a vendor tarball
>> 2.) the number of EGO_SUM entries exceeds 1000 and a Gentoo developer
>> maintains the package
>> 3.) the number of EGO_SUM entries exceeds 1500 and a proxied maintainer
>> maintains the package
> 
> I'm not sure I agree on these limits, given the authenticity problem
> exists regardless of how many dependencies there are.

It's not really about authentication, you always have to trust upstream 
to some degree (unless you audit every line of code). But I believe that 
code distributed via official channels is viewed by more eyes and 
significantly more secure.

EGO_SUM entries are directly fetched from the official distribution 
channels of Golang. Hence, there is a higher chance that malicious code 
in one of those is detected faster, simply because they are consumed by 
more entities. Compared to the dependency tarball that is just used by 
Gentoo. In contrast to the official sources, "nobody" is looking at the 
code inside the tarball.

For proxied packages, where the dependency tarball is published by the 
proxied maintainer, the tarball also allows another entity to inject 
code into the final result of the package. And compared to a few small 
patches in FILESDIR, such a dependency tarball requires more effort to 
review. This further weakens security in comparison to EGO_SUM.

- Flow

[-- Attachment #1.1.2: OpenPGP public key --]
[-- Type: application/pgp-keys, Size: 21081 bytes --]

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 495 bytes --]

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] Proposal to undeprecate EGO_SUM
  2022-09-30 13:57     ` Florian Schmaus
@ 2022-09-30 14:36       ` Jaco Kroon
  2022-09-30 14:53         ` Florian Schmaus
  0 siblings, 1 reply; 58+ messages in thread
From: Jaco Kroon @ 2022-09-30 14:36 UTC (permalink / raw
  To: gentoo-dev, Florian Schmaus

Hi All,

This doesn't directly affect me. Nor am I familiar with the mechanisms.

Perhaps it's worthwhile to suggest that EGO_SUM itself may be
externalized.  I don't know what goes in here, and this will likely
require help from portage itself, so may not be directly viable.

What if portage had a feature whereby a SRC_URI list could be downloaded
as a SRC_URI itself?  In other words:

SRC_URI_INDIRECT="https://wherever/lists_for_some_go_package.txt"

Where that file itself contains lines for entries that would normally go
into SRC_URI (directly or indirectly via EGO_SUM from what I can
deduce).  Something like:

https://www.upstream.com/downloads/package-version.tar.gz =>
fneh.tar.gz|manifest portion goes here

Where manifest portion would assume DIST and fneh.tar.gz, so would start
with the filesize in bytes, followed by checksum value pairs as per
current Manifest files.

Since users may want to know how big the downloads for a specific ebuild
is, some process to generate these external manifests may be in order,
and to subsequently store the size of these indirect downloads
themselves in the local manifest, so in the local Manifest, something like:

IDIST lists_for_some_go_package.txt direct_size indirect_size CHECKSUM
value CHECKSUM value.

I realise this idea isn't immediately feasible, and perhaps not at all,
presented here since perhaps it could spark an idea for someone else. 
It sounds like this is the problem that the vendor tarball tries to
solve, but that that introduces a trust issue - not sure this exactly
goes away but at a minimum we're now verifying download locations again
(as per EGO_SUM or just SRC_URI in general) rather than code tarballs
containing many many times more code than download locations.

Given:

jkroon@plastiekpoot ~ $ du -sh /var/db/repos/gentoo/
644M    /var/db/repos/gentoo/

I'm not against exploding this by another 200 or even 300 MB personally,
but I do agree that pointless bloat is bad, and ideally we want to
shrink the size requirements of the portage tree rather than enlarge.

Kind Regards,
Jaco

On 2022/09/30 15:57, Florian Schmaus wrote:

> On 28/09/2022 23.23, John Helmert III wrote:
>> On Wed, Sep 28, 2022 at 05:28:00PM +0200, Florian Schmaus wrote:
>>> I would like to continue discussing whether we should entirely
>>> deprecate
>>> EGO_SUM without the desire to offend anyone.
>>>
>>> We now have a pending GitHub PR that bumps restic to 0.14 [1].
>>> Restic is
>>> a very popular backup software written in Go. The PR drops EGO_SUM in
>>> favor of a vendor tarball created by the proxied maintainer. However, I
>>> am unaware of any tool that lets you practically audit the 35 MiB
>>> source
>>> contained in the tarball. And even if such a tool exists, this would
>>> mean another manual step is required, which is, potentially, skipped
>>> most of the time, weakening our user's security. This is because I
>>> believe neither our tooling, e.g., go-mod.eclass, nor any Golang
>>> tooling, does authenticate the contents of the vendor tarball against
>>> upstream's go.sum. But please correct me if I am wrong.
>>>
>>> I wonder if we can reach consensus around un-depreacting EGO_SUM, but
>>> discouraging its usage in certain situations. That is, provide EGO_SUM
>>> as option but disallow its use if
>>> 1.) *upstream* provides a vendor tarball
>>> 2.) the number of EGO_SUM entries exceeds 1000 and a Gentoo developer
>>> maintains the package
>>> 3.) the number of EGO_SUM entries exceeds 1500 and a proxied maintainer
>>> maintains the package
>>
>> I'm not sure I agree on these limits, given the authenticity problem
>> exists regardless of how many dependencies there are.
>
> It's not really about authentication, you always have to trust
> upstream to some degree (unless you audit every line of code). But I
> believe that code distributed via official channels is viewed by more
> eyes and significantly more secure.
>
> EGO_SUM entries are directly fetched from the official distribution
> channels of Golang. Hence, there is a higher chance that malicious
> code in one of those is detected faster, simply because they are
> consumed by more entities. Compared to the dependency tarball that is
> just used by Gentoo. In contrast to the official sources, "nobody" is
> looking at the code inside the tarball.
>
> For proxied packages, where the dependency tarball is published by the
> proxied maintainer, the tarball also allows another entity to inject
> code into the final result of the package. And compared to a few small
> patches in FILESDIR, such a dependency tarball requires more effort to
> review. This further weakens security in comparison to EGO_SUM.
>
> - Flow


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] Proposal to undeprecate EGO_SUM
  2022-09-30  0:36     ` William Hubbs
@ 2022-09-30 14:53       ` Florian Schmaus
  2022-09-30 15:48         ` William Hubbs
                           ` (2 more replies)
  2022-09-30 20:07       ` Arsen Arsenović
  1 sibling, 3 replies; 58+ messages in thread
From: Florian Schmaus @ 2022-09-30 14:53 UTC (permalink / raw
  To: gentoo-dev


[-- Attachment #1.1.1: Type: text/plain, Size: 2557 bytes --]

On 30/09/2022 02.36, William Hubbs wrote:
> On Wed, Sep 28, 2022 at 06:31:39PM +0200, Ulrich Mueller wrote:
>>>>>>> On Wed, 28 Sep 2022, Florian Schmaus wrote:
>>> 2.) the number of EGO_SUM entries exceeds 1000 and a Gentoo developer
>>> maintains the package
>>> 3.) the number of EGO_SUM entries exceeds 1500 and a proxied
>>> maintainer maintains the package
>>
>> These numbers seem quite large, compared to the mean number of 3.4
>> distfiles for packages in the Gentoo repository. (The median and the
>> 99-percentile are 1 and 22, respectively.)

The numbers may appear large when compared to the whole tree, but I 
think a fair comparison would be within the related programming language 
ecosystem, e.g., Golang or Rust.

For example, analyzing ::gentoo yields the following histogram for 
2022-01-01:
https://dev.gentoo.org/~flow/ego_sum_entries_histogram-2020-01-01.png


> To stay with your example, restic has a 300k manifest, multiple 30k+
> ebuilds and897 distfiles.
> 
> I'm thinking the limit would have to be much lower. Say, around 256
> entries in EGO_SUM_SRC_URI.

A limit of 256 appears to be to low to be of any use. It is slightly 
above the 50th percentile, half of the packages could not use it.

We have to realize that programming language ecosystems that only build 
static binaries tend to produce software projects that have a large 
number of dependencies. For example, app-misc/broot, a tool written in 
Rust, has currently 310 entries in its Manifest. Why should we threat 
one programming language different from another? Will be see voices that 
ask for banning Rust packages in ::gentoo in the future? With the rising 
popularity of Golang and Rust, we will (hopefully) only ever see an 
increase of such packages in ::gentoo. And most existing packages in 
this category will at best keep their dependency count constant, but are 
also likely to accumulate further dependencies over time.

And quite frankly, I don't see a problem with "large" Manifests and/or 
ebuilds. Yes, it means our FTPs are hosting many files, in some cases 
even many small files. And yes, it means that in some cases ebuild 
parsing takes a bit longer. But I spoke with a few developers in the 
past few months and was not presented with any real world issues that 
EGO_SUM caused. If someone wants to fill in here, then now is a good 
time to speak up. But my impression is that the arguments against 
EGO_SUM are mostly of cosmetic nature. Again, please correct me if I am 
wrong.

- Flow

[-- Attachment #1.1.2: OpenPGP public key --]
[-- Type: application/pgp-keys, Size: 21081 bytes --]

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 495 bytes --]

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] Proposal to undeprecate EGO_SUM
  2022-09-30 14:36       ` Jaco Kroon
@ 2022-09-30 14:53         ` Florian Schmaus
  2022-09-30 15:10           ` Jaco Kroon
  0 siblings, 1 reply; 58+ messages in thread
From: Florian Schmaus @ 2022-09-30 14:53 UTC (permalink / raw
  To: gentoo-dev


[-- Attachment #1.1.1: Type: text/plain, Size: 1550 bytes --]

On 30/09/2022 16.36, Jaco Kroon wrote:
> Hi All,
> 
> This doesn't directly affect me. Nor am I familiar with the mechanisms.
> 
> Perhaps it's worthwhile to suggest that EGO_SUM itself may be
> externalized.  I don't know what goes in here, and this will likely
> require help from portage itself, so may not be directly viable.
> 
> What if portage had a feature whereby a SRC_URI list could be downloaded
> as a SRC_URI itself?  In other words:
> 
> SRC_URI_INDIRECT="https://wherever/lists_for_some_go_package.txt"

That idea pops-up every time this is discussed. I don't see something 
like that anytime soon implemented in portage (please correct me if 
wrong) and it means that the ebuild development workflow requires some 
adjustments, to keep it as convenient as it currently is (but nothing 
couldn't be abstracted away by good tooling, i.e., pkgdev).


> jkroon@plastiekpoot ~ $ du -sh /var/db/repos/gentoo/
> 644M    /var/db/repos/gentoo/
> 
> I'm not against exploding this by another 200 or even 300 MB personally,
> but I do agree that pointless bloat is bad, and ideally we want to
> shrink the size requirements of the portage tree rather than enlarge.

What is the problem if it is 400 MB more? ? What if we double the size? 
Would something break for you? Does that mean we should not add more 
packages to ::gentoo? Where do you draw the line? Would you rather have 
interested persons contribute to Gentoo or drive them away due the 
struggle that the EGO_SUM deprecation causes?

- Flow

[-- Attachment #1.1.2: OpenPGP public key --]
[-- Type: application/pgp-keys, Size: 21081 bytes --]

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 495 bytes --]

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] Proposal to undeprecate EGO_SUM
  2022-09-30 14:53         ` Florian Schmaus
@ 2022-09-30 15:10           ` Jaco Kroon
  2022-09-30 15:32             ` Zoltan Puskas
  0 siblings, 1 reply; 58+ messages in thread
From: Jaco Kroon @ 2022-09-30 15:10 UTC (permalink / raw
  To: gentoo-dev, Florian Schmaus

Hi,

On 2022/09/30 16:53, Florian Schmaus wrote:
> jkroon@plastiekpoot ~ $ du -sh /var/db/repos/gentoo/
>> 644M    /var/db/repos/gentoo/
>>
>> I'm not against exploding this by another 200 or even 300 MB personally,
>> but I do agree that pointless bloat is bad, and ideally we want to
>> shrink the size requirements of the portage tree rather than enlarge.
>
> What is the problem if it is 400 MB more? ? What if we double the
> size? Would something break for you? Does that mean we should not add
> more packages to ::gentoo? Where do you draw the line? Would you
> rather have interested persons contribute to Gentoo or drive them away
> due the struggle that the EGO_SUM deprecation causes?
How long is a piece of string?

I agree with you entirely.  But if the tree gets to 10GB?

At some point it may be worthwhile to split the tree similar to what
Debian does (or did, haven't checked in a while) where there is a core,
non-core repo etc ... except I suspect it may be better to split into
classes of packages, eg, x11 (aka desktop) style packages etc, and keep
::gentoo primarily to system stuff (which is also getting harder and
harder to define).  And this also makes it harder for maintainers.  And
this is really already what separate overlays does except the don't (as
far as I know) have the rigorous QA that ::gentoo has.

But again - at what point do you do this - and this also adds extra
burden on maintainers and developers alike.

And of course I could set a filter to not even --sync say /x11-* at
all.  For example.  Or /dev-go or /dev-php etc ...

So perhaps you're right, this is a moot discussion.  Perhaps we should
just say let's solve the problem when (if?) people complain the tree is
too big.  No, I'm not being sarcastic, just blunt (;

The majority of Gentoo users (in my experience) are probably of the
developer oriented mindset either way, or have very specific itches that
need scratching that's hard to scratch with other distributions.  Let's
face it, Gentoo to begin with should probably not be considered an
"easy" distribution.  But it is a highly flexible, pro-choice, extremely
customizable, rolling release distribution.  Which scratches my itch.

Incidentally, the only categories currently to individually exceed 10MB
are these:

11M    media-libs
11M    net-misc
12M    dev-util
13M    dev-ruby
16M    dev-libs
30M    dev-perl
31M    dev-python

And by far the biggest consumer of space:

124M    metadata

Kind Regards,
Jaco


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] Proposal to undeprecate EGO_SUM
  2022-09-30 15:10           ` Jaco Kroon
@ 2022-09-30 15:32             ` Zoltan Puskas
  0 siblings, 0 replies; 58+ messages in thread
From: Zoltan Puskas @ 2022-09-30 15:32 UTC (permalink / raw
  To: gentoo-dev

Hi,

When the size of the repo is considered too big maybe we can revisit the option
of having the portage tree distributed as a compressed sqashfs image.

    $ du -hs /var/db/repos/gentoo
    536M    .
    $ gensquashfs -k -q -b 1M -D /var/db/repos/gentoo -c zstd -X level=22 /tmp/gentoo-current.zstd.sqfs
    $ du -h /tmp/gentoo-current.zstd.sqfs
    47M     /tmp/gentoo-current.zstd.sqfs

Though that would probably open another can of worms around incremental updates
to the portage tree, or more precisely the lack of it (i.e. increased bandwidth
requirements).

Regardless, as a proxied maintainer I agree with Flow's point of view here (I
think I have expressed these in detail too in the past here) and would prefer
undeprecating EGO_SUM.

Zoltan

On Fri, Sep 30, 2022 at 05:10:10PM +0200, Jaco Kroon wrote:
> Hi,
> 
> On 2022/09/30 16:53, Florian Schmaus wrote:
> > jkroon@plastiekpoot ~ $ du -sh /var/db/repos/gentoo/
> >> 644M    /var/db/repos/gentoo/
> >>
> >> I'm not against exploding this by another 200 or even 300 MB personally,
> >> but I do agree that pointless bloat is bad, and ideally we want to
> >> shrink the size requirements of the portage tree rather than enlarge.
> >
> > What is the problem if it is 400 MB more? ? What if we double the
> > size? Would something break for you? Does that mean we should not add
> > more packages to ::gentoo? Where do you draw the line? Would you
> > rather have interested persons contribute to Gentoo or drive them away
> > due the struggle that the EGO_SUM deprecation causes?
> How long is a piece of string?
> 
> I agree with you entirely.  But if the tree gets to 10GB?
> 
> At some point it may be worthwhile to split the tree similar to what
> Debian does (or did, haven't checked in a while) where there is a core,
> non-core repo etc ... except I suspect it may be better to split into
> classes of packages, eg, x11 (aka desktop) style packages etc, and keep
> ::gentoo primarily to system stuff (which is also getting harder and
> harder to define).  And this also makes it harder for maintainers.  And
> this is really already what separate overlays does except the don't (as
> far as I know) have the rigorous QA that ::gentoo has.
> 
> But again - at what point do you do this - and this also adds extra
> burden on maintainers and developers alike.
> 
> And of course I could set a filter to not even --sync say /x11-* at
> all.  For example.  Or /dev-go or /dev-php etc ...
> 
> So perhaps you're right, this is a moot discussion.  Perhaps we should
> just say let's solve the problem when (if?) people complain the tree is
> too big.  No, I'm not being sarcastic, just blunt (;
> 
> The majority of Gentoo users (in my experience) are probably of the
> developer oriented mindset either way, or have very specific itches that
> need scratching that's hard to scratch with other distributions.  Let's
> face it, Gentoo to begin with should probably not be considered an
> "easy" distribution.  But it is a highly flexible, pro-choice, extremely
> customizable, rolling release distribution.  Which scratches my itch.
> 
> Incidentally, the only categories currently to individually exceed 10MB
> are these:
> 
> 11M    media-libs
> 11M    net-misc
> 12M    dev-util
> 13M    dev-ruby
> 16M    dev-libs
> 30M    dev-perl
> 31M    dev-python
> 
> And by far the biggest consumer of space:
> 
> 124M    metadata
> 
> Kind Regards,
> Jaco
> 


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] Proposal to undeprecate EGO_SUM
  2022-09-30 14:53       ` Florian Schmaus
@ 2022-09-30 15:48         ` William Hubbs
  2022-09-30 19:18         ` Sam James
  2022-09-30 19:49         ` [gentoo-dev] Proposal to undeprecate EGO_SUM Alec Warner
  2 siblings, 0 replies; 58+ messages in thread
From: William Hubbs @ 2022-09-30 15:48 UTC (permalink / raw
  To: gentoo-dev

[-- Attachment #1: Type: text/plain, Size: 3469 bytes --]

On Fri, Sep 30, 2022 at 04:53:39PM +0200, Florian Schmaus wrote:
> On 30/09/2022 02.36, William Hubbs wrote:
> > On Wed, Sep 28, 2022 at 06:31:39PM +0200, Ulrich Mueller wrote:
> >>>>>>> On Wed, 28 Sep 2022, Florian Schmaus wrote:
> >>> 2.) the number of EGO_SUM entries exceeds 1000 and a Gentoo developer
> >>> maintains the package
> >>> 3.) the number of EGO_SUM entries exceeds 1500 and a proxied
> >>> maintainer maintains the package
> >>
> >> These numbers seem quite large, compared to the mean number of 3.4
> >> distfiles for packages in the Gentoo repository. (The median and the
> >> 99-percentile are 1 and 22, respectively.)
> 
> The numbers may appear large when compared to the whole tree, but I 
> think a fair comparison would be within the related programming language 
> ecosystem, e.g., Golang or Rust.
> 
> For example, analyzing ::gentoo yields the following histogram for 
> 2022-01-01:
> https://dev.gentoo.org/~flow/ego_sum_entries_histogram-2020-01-01.png
> 
> 
> > To stay with your example, restic has a 300k manifest, multiple 30k+
> > ebuilds and897 distfiles.
> > 
> > I'm thinking the limit would have to be much lower. Say, around 256
> > entries in EGO_SUM_SRC_URI.
> 
> A limit of 256 appears to be to low to be of any use. It is slightly 
> above the 50th percentile, half of the packages could not use it.
> 
> We have to realize that programming language ecosystems that only build 
> static binaries tend to produce software projects that have a large 
> number of dependencies. For example, app-misc/broot, a tool written in 
> Rust, has currently 310 entries in its Manifest. Why should we threat 
> one programming language different from another? Will be see voices that 
> ask for banning Rust packages in ::gentoo in the future? With the rising 
> popularity of Golang and Rust, we will (hopefully) only ever see an 
> increase of such packages in ::gentoo. And most existing packages in 
> this category will at best keep their dependency count constant, but are 
> also likely to accumulate further dependencies over time.

I tend to agree with you honestly. I worked with Zac to come up with a
different proposal which would allow upstream tooling for all languages
that do this to work, but so far it is meeting resistance [1].
I will go back and add more information to that bug, but it will be later
today before I can do that. I want to develop a poc to answer the
statement that these would be live ebuilds if we allowed that.

> And quite frankly, I don't see a problem with "large" Manifests and/or 
> ebuilds. Yes, it means our FTPs are hosting many files, in some cases 
> even many small files. And yes, it means that in some cases ebuild 
> parsing takes a bit longer. But I spoke with a few developers in the 
> past few months and was not presented with any real world issues that 
> EGO_SUM caused. If someone wants to fill in here, then now is a good 
> time to speak up. But my impression is that the arguments against 
> EGO_SUM are mostly of cosmetic nature. Again, please correct me if I am 
> wrong.

I can't name any specific examples at the moment, but I have gotten some
complaints about how long it takes to download and build go
packages with hundreds of dependencies.

Other than that, I'm not the one who voiced the problem originally, so
we definitely need others to speak up.

William

[1] https://bugs.gentoo.org/833567

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] Proposal to undeprecate EGO_SUM
  2022-09-28 15:28 ` Florian Schmaus
  2022-09-28 16:31   ` Ulrich Mueller
  2022-09-28 21:23   ` John Helmert III
@ 2022-09-30 19:02   ` Georgy Yakovlev
  2 siblings, 0 replies; 58+ messages in thread
From: Georgy Yakovlev @ 2022-09-30 19:02 UTC (permalink / raw
  To: gentoo-dev

On Wed, 2022-09-28 at 17:28 +0200, Florian Schmaus wrote:
> > I would like to continue discussing whether we should entirely >
> > deprecate 
> > EGO_SUM without the desire to offend anyone.
> > 
> > We now have a pending GitHub PR that bumps restic to 0.14 [1].
> > Restic > is 
> > a very popular backup software written in Go. The PR drops EGO_SUM
> > in
> > favor of a vendor tarball created by the proxied maintainer.
> > However, > I 
> > am unaware of any tool that lets you practically audit the 35 MiB >
> > source 
> > contained in the tarball. And even if such a tool exists, this
> > would 
> > mean another manual step is required, which is, potentially,
> > skipped 
> > most of the time, weakening our user's security. This is because I 
> > believe neither our tooling, e.g., go-mod.eclass, nor any Golang 
> > tooling, does authenticate the contents of the vendor tarball
> > against
> > upstream's go.sum. But please correct me if I am wrong.
> > 
> > I wonder if we can reach consensus around un-depreacting EGO_SUM,
> > but
> > discouraging its usage in certain situations. That is, provide >
> > EGO_SUM 
> > as option but disallow its use if
> > 1.) *upstream* provides a vendor tarball
> > 2.) the number of EGO_SUM entries exceeds 1000 and a Gentoo
> > developer
> > maintains the package
> > 3.) the number of EGO_SUM entries exceeds 1500 and a proxied >
> > maintainer 
> > maintains the package
> > 
> > In case of 3, I would encourage proxy maintainers to create and >
> > provide 
> > the vendor tarball.
> > 
> > The suggested EGO_SUM limits result from a histogram that I created
> > analyzing ::gentoo at 2022-01-01, i.e., a few months before EGO_SUM
> > > was 
> > deprecated.

I think those numbers are too large but overall I think bringing back
EGO_SUM in limited form is a good move, because it allows packaging go
ebuilds in an easy and audit-able way.
If you have vendor tarball - it's completely opaque before you unpack.
With EGO_SUM you could parse ebuilds using that and scan for vulnerable
go modules. and ofc vendored source hosting is a problem

From rust's team perspective ( we use CRATES, which is EGO_SUM
inspiration, but _much_ more compact one) - I'd say take largest rust
ebuild and allow as much as that or slightly more.
x11-terms/alacritty is one of largest and CRATES number of lines is
about 210 per 1 ebuild.

So I'd say set maximum EGO_SUM size to 256 for ::gentoo, or maybe 512,
remove limit for overlays completely. and introduce a hard die() in
eclass if EGO_SUM is larger than that.
not sure if you can detect repo name in eclass.
In that case pkgcheck and CI could enforce that as fat warnings or
errors.

256/512 limitation will not impose limit on manifest directly, but if
you have
5 versions of max 256/512 EGO_SUM loc - it'll be more reasonable than
5 versions of max 1500 EGO_SUM loc.

rust/cargo ebuild will still produce more compact Manifest given same
amount of lines though, so it's still not directly comparable.

currently we have 3 versions of alacritty which uses 407 unique crates
across 3 versions. Manifest size is about 120K, which is 20th largest
in ::gentoo
It's nothing compared to 2.5MB manifests we used to have in some of the
largest go packages.

> > 
> > - Flow
> > 
> > 1: https://github.com/gentoo/gentoo/pull/27050
> > 




^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] Proposal to undeprecate EGO_SUM
  2022-09-30 14:53       ` Florian Schmaus
  2022-09-30 15:48         ` William Hubbs
@ 2022-09-30 19:18         ` Sam James
  2022-10-11 10:06           ` [gentoo-dev] RFC: check A's size in go-module.eclass Florian Schmaus
  2022-09-30 19:49         ` [gentoo-dev] Proposal to undeprecate EGO_SUM Alec Warner
  2 siblings, 1 reply; 58+ messages in thread
From: Sam James @ 2022-09-30 19:18 UTC (permalink / raw
  To: gentoo-dev; +Cc: Florian Schmaus

[-- Attachment #1: Type: text/plain, Size: 2795 bytes --]



> On 30 Sep 2022, at 15:53, Florian Schmaus <flow@gentoo.org> wrote:
> 
> On 30/09/2022 02.36, William Hubbs wrote:
>> On Wed, Sep 28, 2022 at 06:31:39PM +0200, Ulrich Mueller wrote:
>>>>>>>> On Wed, 28 Sep 2022, Florian Schmaus wrote:
>>>> 2.) the number of EGO_SUM entries exceeds 1000 and a Gentoo developer
>>>> maintains the package
>>>> 3.) the number of EGO_SUM entries exceeds 1500 and a proxied
>>>> maintainer maintains the package
>>> 
>>> These numbers seem quite large, compared to the mean number of 3.4
>>> distfiles for packages in the Gentoo repository. (The median and the
>>> 99-percentile are 1 and 22, respectively.)
> 
> The numbers may appear large when compared to the whole tree, but I think a fair comparison would be within the related programming language ecosystem, e.g., Golang or Rust.
> 
> For example, analyzing ::gentoo yields the following histogram for 2022-01-01:
> https://dev.gentoo.org/~flow/ego_sum_entries_histogram-2020-01-01.png
> 
> 
>> To stay with your example, restic has a 300k manifest, multiple 30k+
>> ebuilds and897 distfiles.
>> I'm thinking the limit would have to be much lower. Say, around 256
>> entries in EGO_SUM_SRC_URI.
> 
> A limit of 256 appears to be to low to be of any use. It is slightly above the 50th percentile, half of the packages could not use it.
> 
> We have to realize that programming language ecosystems that only build static binaries tend to produce software projects that have a large number of dependencies. For example, app-misc/broot, a tool written in Rust, has currently 310 entries in its Manifest. Why should we threat one programming language different from another? Will be see voices that ask for banning Rust packages in ::gentoo in the future? With the rising popularity of Golang and Rust, we will (hopefully) only ever see an increase of such packages in ::gentoo. And most existing packages in this category will at best keep their dependency count constant, but are also likely to accumulate further dependencies over time.
> 
> And quite frankly, I don't see a problem with "large" Manifests and/or ebuilds. Yes, it means our FTPs are hosting many files, in some cases even many small files. And yes, it means that in some cases ebuild parsing takes a bit longer. But I spoke with a few developers in the past few months and was not presented with any real world issues that EGO_SUM caused. If someone wants to fill in here, then now is a good time to speak up. But my impression is that the arguments against EGO_SUM are mostly of cosmetic nature. Again, please correct me if I am wrong.
> 

I need to re-read the whole set of new messages in this thread, but there's still the issue of xargs/command length limits from huge variable contents.

Best,
sam

[-- Attachment #2: Message signed with OpenPGP --]
[-- Type: application/pgp-signature, Size: 358 bytes --]

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] Proposal to undeprecate EGO_SUM
  2022-09-30 14:53       ` Florian Schmaus
  2022-09-30 15:48         ` William Hubbs
  2022-09-30 19:18         ` Sam James
@ 2022-09-30 19:49         ` Alec Warner
  2022-10-01  0:06           ` William Hubbs
  2022-10-01 13:42           ` Florian Schmaus
  2 siblings, 2 replies; 58+ messages in thread
From: Alec Warner @ 2022-09-30 19:49 UTC (permalink / raw
  To: gentoo-dev

On Fri, Sep 30, 2022 at 7:53 AM Florian Schmaus <flow@gentoo.org> wrote:
>
> On 30/09/2022 02.36, William Hubbs wrote:
> > On Wed, Sep 28, 2022 at 06:31:39PM +0200, Ulrich Mueller wrote:
> >>>>>>> On Wed, 28 Sep 2022, Florian Schmaus wrote:
> >>> 2.) the number of EGO_SUM entries exceeds 1000 and a Gentoo developer
> >>> maintains the package
> >>> 3.) the number of EGO_SUM entries exceeds 1500 and a proxied
> >>> maintainer maintains the package
> >>
> >> These numbers seem quite large, compared to the mean number of 3.4
> >> distfiles for packages in the Gentoo repository. (The median and the
> >> 99-percentile are 1 and 22, respectively.)
>
> The numbers may appear large when compared to the whole tree, but I
> think a fair comparison would be within the related programming language
> ecosystem, e.g., Golang or Rust.
>
> For example, analyzing ::gentoo yields the following histogram for
> 2022-01-01:
> https://dev.gentoo.org/~flow/ego_sum_entries_histogram-2020-01-01.png
>
>
> > To stay with your example, restic has a 300k manifest, multiple 30k+
> > ebuilds and897 distfiles.
> >
> > I'm thinking the limit would have to be much lower. Say, around 256
> > entries in EGO_SUM_SRC_URI.
>
> A limit of 256 appears to be to low to be of any use. It is slightly
> above the 50th percentile, half of the packages could not use it.
>
> We have to realize that programming language ecosystems that only build
> static binaries tend to produce software projects that have a large
> number of dependencies. For example, app-misc/broot, a tool written in
> Rust, has currently 310 entries in its Manifest. Why should we threat
> one programming language different from another? Will be see voices that
> ask for banning Rust packages in ::gentoo in the future? With the rising
> popularity of Golang and Rust, we will (hopefully) only ever see an
> increase of such packages in ::gentoo. And most existing packages in
> this category will at best keep their dependency count constant, but are
> also likely to accumulate further dependencies over time.
>
> And quite frankly, I don't see a problem with "large" Manifests and/or
> ebuilds. Yes, it means our FTPs are hosting many files, in some cases
> even many small files. And yes, it means that in some cases ebuild
> parsing takes a bit longer. But I spoke with a few developers in the
> past few months and was not presented with any real world issues that
> EGO_SUM caused. If someone wants to fill in here, then now is a good
> time to speak up. But my impression is that the arguments against
> EGO_SUM are mostly of cosmetic nature. Again, please correct me if I am
> wrong.

I thought the problem was that EGO_SUM ends up in SRC_URI, which ends
up in A. A ends up in the environment, and then exec() fails with
E2BIG because there is an imposed limit on environment variables (and
also command line argument length.)

Did this get fixed?

https://bugs.gentoo.org/719202

>
> - Flow


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] Proposal to undeprecate EGO_SUM
  2022-09-30  0:36     ` William Hubbs
  2022-09-30 14:53       ` Florian Schmaus
@ 2022-09-30 20:07       ` Arsen Arsenović
  2022-09-30 23:49         ` William Hubbs
  1 sibling, 1 reply; 58+ messages in thread
From: Arsen Arsenović @ 2022-09-30 20:07 UTC (permalink / raw
  To: gentoo-dev

[-- Attachment #1: Type: text/plain, Size: 1773 bytes --]

Hey,

On Friday, 30 September 2022 02:36:05 CEST William Hubbs wrote:
> I don't know for certain about a vendor tarball, but I do know there
> are instances where a vendor tarball wouldn't work.
> app-containers/containerd is a good example of this, That is why the
> vendor tarball idea was dropped.
It is indeed not possible to verify vendor tarballs[1].  The proposed 
solution Go people had would also require network access.

> Upstream doesn't need to provide a tarball, just an up-to-date
> "vendor" directory at the top level of the project. Two examples that
> do this are docker and kubernetes.
Upstreams doing this sounds like a mess, because then they'd have to 
maintain multiple source trees in their repositories, if I understand 
what you mean.

An alternative to vendor tarballs is modcache tarballs. These are 
absolutely massive (~20 times larger IIRC), though, they are verifiable.

opinion: I see no way around it. Vendor tarballs are the way to go.  For 
trivial cases, this can likely be EGO_SUM, but it scales exceedingly 
poorly, to the point of the trivial case being a very small percentage 
of Go packages.  I proposed authenticated automation on Gentoo 
infrastructure as a solution to this, and implemented (a slow and 
unreliable) proof of concept (posted previously).  The obvious question 
of "how will proxy maintainers deal with this" is also relatively 
simple: giving them authorization for a subset of packages that they'd 
need to work on. This is an obvious increase in the barrier of entry for 
fresh proxy maintainers, but it's still likely less than needing 
maintainers to rework ebuilds to use vendor tarballs on dev.g.o.


[1]: https://github.com/golang/go/issues/27348
-- 
Arsen Arsenović

[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 358 bytes --]

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] Proposal to undeprecate EGO_SUM
  2022-09-30 20:07       ` Arsen Arsenović
@ 2022-09-30 23:49         ` William Hubbs
  0 siblings, 0 replies; 58+ messages in thread
From: William Hubbs @ 2022-09-30 23:49 UTC (permalink / raw
  To: gentoo-dev

[-- Attachment #1: Type: text/plain, Size: 3157 bytes --]

On Fri, Sep 30, 2022 at 10:07:44PM +0200, Arsen Arsenović wrote:
> Hey,
> 
> On Friday, 30 September 2022 02:36:05 CEST William Hubbs wrote:
> > I don't know for certain about a vendor tarball, but I do know there
> > are instances where a vendor tarball wouldn't work.
> > app-containers/containerd is a good example of this, That is why the
> > vendor tarball idea was dropped.
> It is indeed not possible to verify vendor tarballs[1].  The proposed 
> solution Go people had would also require network access.
> 
> > Upstream doesn't need to provide a tarball, just an up-to-date
> > "vendor" directory at the top level of the project. Two examples that
> > do this are docker and kubernetes.
> Upstreams doing this sounds like a mess, because then they'd have to 
> maintain multiple source trees in their repositories, if I understand 
> what you mean.

Well, there isn't a lot of work involved in this for upstream, they just
run:

$ go mod vendor

at the top level of their project and keep that directory in sync in
their vcs. The down side is it can be big and some upstreams do not want
to do it.

> 
> An alternative to vendor tarballs is modcache tarballs. These are 
> absolutely massive (~20 times larger IIRC), though, they are verifiable.

The modcache tarballs are what I'm calling dependency tarballs, and yes
they are bigger than vendor tarballs and verifiable.
Also, the go-module eclass sets the GOMODCACHE environment variable to
point to the directory where the contents of the dependency tarball ends
up which makes it easy for the go tooling to just use the information in
that directory.

If we can get bug https://bugs.gentoo.org/833567 to happen in eapi 9,
that would solve all of this.

The next step after I got that to happen would be to put a shared go
module cache in, for example, "${DISTDIR}/go-mod", so that all go
modules from packages would be downloaded there, and they would be
consumed like all distfiles are.

> opinion: I see no way around it. Vendor tarballs are the way to go.  For 
> trivial cases, this can likely be EGO_SUM, but it scales exceedingly 
> poorly, to the point of the trivial case being a very small percentage 
> of Go packages.  I proposed authenticated automation on Gentoo 
> infrastructure as a solution to this, and implemented (a slow and 
> unreliable) proof of concept (posted previously).  The obvious question 
> of "how will proxy maintainers deal with this" is also relatively 
> simple: giving them authorization for a subset of packages that they'd 
> need to work on. This is an obvious increase in the barrier of entry for 
> fresh proxy maintainers, but it's still likely less than needing 
> maintainers to rework ebuilds to use vendor tarballs on dev.g.o.

Vendor tarballs are not complete.  The best example of this I see in the tree is
app-containers/containerd.  If you try to build that with a vendor tarball
instead of a dependency tarball, the build will break, but it works with
a dependency tarball.

William


> 
> 
> [1]: https://github.com/golang/go/issues/27348
> -- 
> Arsen Arsenović



[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] Proposal to undeprecate EGO_SUM
  2022-09-30 19:49         ` [gentoo-dev] Proposal to undeprecate EGO_SUM Alec Warner
@ 2022-10-01  0:06           ` William Hubbs
  2022-10-01 13:42           ` Florian Schmaus
  1 sibling, 0 replies; 58+ messages in thread
From: William Hubbs @ 2022-10-01  0:06 UTC (permalink / raw
  To: gentoo-dev

[-- Attachment #1: Type: text/plain, Size: 3296 bytes --]

On Fri, Sep 30, 2022 at 12:49:02PM -0700, Alec Warner wrote:
> On Fri, Sep 30, 2022 at 7:53 AM Florian Schmaus <flow@gentoo.org> wrote:
> >
> > On 30/09/2022 02.36, William Hubbs wrote:
> > > On Wed, Sep 28, 2022 at 06:31:39PM +0200, Ulrich Mueller wrote:
> > >>>>>>> On Wed, 28 Sep 2022, Florian Schmaus wrote:
> > >>> 2.) the number of EGO_SUM entries exceeds 1000 and a Gentoo developer
> > >>> maintains the package
> > >>> 3.) the number of EGO_SUM entries exceeds 1500 and a proxied
> > >>> maintainer maintains the package
> > >>
> > >> These numbers seem quite large, compared to the mean number of 3.4
> > >> distfiles for packages in the Gentoo repository. (The median and the
> > >> 99-percentile are 1 and 22, respectively.)
> >
> > The numbers may appear large when compared to the whole tree, but I
> > think a fair comparison would be within the related programming language
> > ecosystem, e.g., Golang or Rust.
> >
> > For example, analyzing ::gentoo yields the following histogram for
> > 2022-01-01:
> > https://dev.gentoo.org/~flow/ego_sum_entries_histogram-2020-01-01.png
> >
> >
> > > To stay with your example, restic has a 300k manifest, multiple 30k+
> > > ebuilds and897 distfiles.
> > >
> > > I'm thinking the limit would have to be much lower. Say, around 256
> > > entries in EGO_SUM_SRC_URI.
> >
> > A limit of 256 appears to be to low to be of any use. It is slightly
> > above the 50th percentile, half of the packages could not use it.
> >
> > We have to realize that programming language ecosystems that only build
> > static binaries tend to produce software projects that have a large
> > number of dependencies. For example, app-misc/broot, a tool written in
> > Rust, has currently 310 entries in its Manifest. Why should we threat
> > one programming language different from another? Will be see voices that
> > ask for banning Rust packages in ::gentoo in the future? With the rising
> > popularity of Golang and Rust, we will (hopefully) only ever see an
> > increase of such packages in ::gentoo. And most existing packages in
> > this category will at best keep their dependency count constant, but are
> > also likely to accumulate further dependencies over time.
> >
> > And quite frankly, I don't see a problem with "large" Manifests and/or
> > ebuilds. Yes, it means our FTPs are hosting many files, in some cases
> > even many small files. And yes, it means that in some cases ebuild
> > parsing takes a bit longer. But I spoke with a few developers in the
> > past few months and was not presented with any real world issues that
> > EGO_SUM caused. If someone wants to fill in here, then now is a good
> > time to speak up. But my impression is that the arguments against
> > EGO_SUM are mostly of cosmetic nature. Again, please correct me if I am
> > wrong.
> 
> I thought the problem was that EGO_SUM ends up in SRC_URI, which ends
> up in A. A ends up in the environment, and then exec() fails with
> E2BIG because there is an imposed limit on environment variables (and
> also command line argument length.)
> 
> Did this get fixed?
> 
> https://bugs.gentoo.org/719202

You are correct this was part of the issue as well. I don't know what
the status of this bug is.

William

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] Proposal to undeprecate EGO_SUM
  2022-09-30 19:49         ` [gentoo-dev] Proposal to undeprecate EGO_SUM Alec Warner
  2022-10-01  0:06           ` William Hubbs
@ 2022-10-01 13:42           ` Florian Schmaus
  2022-10-01 16:36             ` Ulrich Mueller
  1 sibling, 1 reply; 58+ messages in thread
From: Florian Schmaus @ 2022-10-01 13:42 UTC (permalink / raw
  To: gentoo-dev


[-- Attachment #1.1.1: Type: text/plain, Size: 1576 bytes --]

On 30/09/2022 21.49, Alec Warner wrote:
> On Fri, Sep 30, 2022 at 7:53 AM Florian Schmaus <flow@gentoo.org> wrote:
>> And quite frankly, I don't see a problem with "large" Manifests and/or
>> ebuilds. Yes, it means our FTPs are hosting many files, in some cases
>> even many small files. And yes, it means that in some cases ebuild
>> parsing takes a bit longer. But I spoke with a few developers in the
>> past few months and was not presented with any real world issues that
>> EGO_SUM caused. If someone wants to fill in here, then now is a good
>> time to speak up. But my impression is that the arguments against
>> EGO_SUM are mostly of cosmetic nature. Again, please correct me if I am
>> wrong.
> 
> I thought the problem was that EGO_SUM ends up in SRC_URI, which ends
> up in A. A ends up in the environment, and then exec() fails with
> E2BIG because there is an imposed limit on environment variables (and
> also command line argument length.)
> 
> Did this get fixed?
> 
> https://bugs.gentoo.org/719202

Bug #719201 was triggered by dev-texlive/texlive-latexextra-2000. It 
appears that the ebuild had more than 6000 entries in SRC_URI [1], from 
which A is generated from. Hence even a EGO_SUM limit of 3000 entries 
should provide enough safety margin to avoid any Golang ebuild running 
into this.

- Flow


1: Estimated via
curl 
https://raw.githubusercontent.com/gentoo-mirror/gentoo/39474128bc64d6d4738c9647dbd3b0d1c1268fc4/metadata/md5-cache/dev-texlive/texlive-latexextra-2020 
| grep SRC_URI | awk -F" " '{print NF-1}'

[-- Attachment #1.1.2: OpenPGP public key --]
[-- Type: application/pgp-keys, Size: 21081 bytes --]

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 495 bytes --]

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] Proposal to undeprecate EGO_SUM
  2022-10-01 13:42           ` Florian Schmaus
@ 2022-10-01 16:36             ` Ulrich Mueller
  2022-10-01 17:21               ` Florian Schmaus
  0 siblings, 1 reply; 58+ messages in thread
From: Ulrich Mueller @ 2022-10-01 16:36 UTC (permalink / raw
  To: Florian Schmaus; +Cc: gentoo-dev

[-- Attachment #1: Type: text/plain, Size: 720 bytes --]

>>>>> On Sat, 01 Oct 2022, Florian Schmaus wrote:

> Bug #719201 was triggered by dev-texlive/texlive-latexextra-2000. It
> appears that the ebuild had more than 6000 entries in SRC_URI [1],

That includes double counting and must be divided by the number of
developers in TEXLIVE_DEVS. AFAICS that number was two in 2020. So 3000
is more realistic as a number there.

> from which A is generated from. Hence even a EGO_SUM limit of 3000
> entries should provide enough safety margin to avoid any Golang ebuild
> running into this.

See above, with 3000 entries there may be zero safety margin. It also
depends on total filename length, because the limit is the Linux
kernel's MAX_ARG_STRLEN (which is 128 KiB).

Ulrich

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 507 bytes --]

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] Proposal to undeprecate EGO_SUM
  2022-10-01 16:36             ` Ulrich Mueller
@ 2022-10-01 17:21               ` Florian Schmaus
  2022-10-01 20:59                 ` William Hubbs
  0 siblings, 1 reply; 58+ messages in thread
From: Florian Schmaus @ 2022-10-01 17:21 UTC (permalink / raw
  To: Ulrich Mueller; +Cc: gentoo-dev

On 01/10/2022 18.36, Ulrich Mueller wrote:
>>>>>> On Sat, 01 Oct 2022, Florian Schmaus wrote:
> 
>> Bug #719201 was triggered by dev-texlive/texlive-latexextra-2000. It
>> appears that the ebuild had more than 6000 entries in SRC_URI [1],
> 
> That includes double counting and must be divided by the number of
> developers in TEXLIVE_DEVS. AFAICS that number was two in 2020. So 3000
> is more realistic as a number there.

That may be very well the case. I'd appreciate if you would elaborate on 
the double counting. If someone knows a good and easy way to compute A 
for an ebuild, then please let me know. That would help to get more 
meaningful data.


>> from which A is generated from. Hence even a EGO_SUM limit of 3000
>> entries should provide enough safety margin to avoid any Golang ebuild
>> running into this.
> 
> See above, with 3000 entries there may be zero safety margin. It also
> depends on total filename length, because the limit is the Linux
> kernel's MAX_ARG_STRLEN (which is 128 KiB).

Of course, this is a rough estimation assuming that the filename length 
is roughly the same on average. That said, my proposed limit for EGO_SUM 
is 1500, which is still half of 3000 and should still provide enough 
safety margin.

- Flow


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] Proposal to undeprecate EGO_SUM
  2022-10-01 17:21               ` Florian Schmaus
@ 2022-10-01 20:59                 ` William Hubbs
  0 siblings, 0 replies; 58+ messages in thread
From: William Hubbs @ 2022-10-01 20:59 UTC (permalink / raw
  To: gentoo-dev

[-- Attachment #1: Type: text/plain, Size: 1569 bytes --]

On Sat, Oct 01, 2022 at 07:21:13PM +0200, Florian Schmaus wrote:
> On 01/10/2022 18.36, Ulrich Mueller wrote:
> >>>>>> On Sat, 01 Oct 2022, Florian Schmaus wrote:
> > 
> >> Bug #719201 was triggered by dev-texlive/texlive-latexextra-2000. It
> >> appears that the ebuild had more than 6000 entries in SRC_URI [1],
> > 
> > That includes double counting and must be divided by the number of
> > developers in TEXLIVE_DEVS. AFAICS that number was two in 2020. So 3000
> > is more realistic as a number there.
> 
> That may be very well the case. I'd appreciate if you would elaborate on 
> the double counting. If someone knows a good and easy way to compute A 
> for an ebuild, then please let me know. That would help to get more 
> meaningful data.
> 
> 
> >> from which A is generated from. Hence even a EGO_SUM limit of 3000
> >> entries should provide enough safety margin to avoid any Golang ebuild
> >> running into this.
> > 
> > See above, with 3000 entries there may be zero safety margin. It also
> > depends on total filename length, because the limit is the Linux
> > kernel's MAX_ARG_STRLEN (which is 128 KiB).
> 
> Of course, this is a rough estimation assuming that the filename length 
> is roughly the same on average. That said, my proposed limit for EGO_SUM 
> is 1500, which is still half of 3000 and should still provide enough 
> safety margin.

Since EGO_SUM_SRC_URI is the variable that gets added to SRC_URI, I
would rather put the limitation there instead of EGO_SUM if we do end up
keeping this.

William


[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

^ permalink raw reply	[flat|nested] 58+ messages in thread

* [gentoo-dev] RFC: check A's size in go-module.eclass
  2022-09-30 19:18         ` Sam James
@ 2022-10-11 10:06           ` Florian Schmaus
  2022-10-11 10:06             ` [gentoo-dev] [PATCH] go-module.eclass: ensure that A is less than 112 KiB Florian Schmaus
  2022-10-11 15:33             ` [gentoo-dev] RFC: check A's size in go-module.eclass Mike Gilbert
  0 siblings, 2 replies; 58+ messages in thread
From: Florian Schmaus @ 2022-10-11 10:06 UTC (permalink / raw
  To: gentoo-dev


This is a first suggestion in an effort to reach a compromise that
allows EGO_SUM to be un-depracted.

I have decided to check the size of A, instead of counting the entries
in EGO_SUM, because that seemed more sensible given that as A's size
caused functional issues in the past (bug #719202 [1]).

1: https://bugs.gentoo.org/719202



^ permalink raw reply	[flat|nested] 58+ messages in thread

* [gentoo-dev] [PATCH] go-module.eclass: ensure that A is less than 112 KiB
  2022-10-11 10:06           ` [gentoo-dev] RFC: check A's size in go-module.eclass Florian Schmaus
@ 2022-10-11 10:06             ` Florian Schmaus
  2022-10-11 15:26               ` Mike Gilbert
  2022-10-11 15:33             ` [gentoo-dev] RFC: check A's size in go-module.eclass Mike Gilbert
  1 sibling, 1 reply; 58+ messages in thread
From: Florian Schmaus @ 2022-10-11 10:06 UTC (permalink / raw
  To: gentoo-dev; +Cc: Florian Schmaus

Packages with a large number of EGO_SUM entries, i.e., many thousands,
cause SRC_URI, and in turn A, to become quite large. Prevent issues that
are caused by large environment variables, e.g., execve() errors (see
bug #719203), by ensuring that A stays below a reasonable size.

Signed-off-by: Florian Schmaus <flow@gentoo.org>
---
 eclass/go-module.eclass | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/eclass/go-module.eclass b/eclass/go-module.eclass
index 8047d498b08d..88414b7e9459 100644
--- a/eclass/go-module.eclass
+++ b/eclass/go-module.eclass
@@ -377,6 +377,15 @@ _go-module_src_unpack_gosum() {
 		die "go-module_set_globals must be called in global scope"
 	fi
 
+	local -i a_size="${#A}"
+	# Environment variables must not exceed MAX_ARG_STRLEN (128 KiB) on
+	# Linux, or otherwise execve() may fail. Ensure that A stays below
+	# this value. See also https://bugs.gentoo.org/719202#c16
+	if [[ ${a_size} -gt 114688 ]]; then
+		# A is larger than 112 KiB.
+		die "Size of A variable (${a_size} bytes) is too large. Please use a dependency tarball instead of EGO_SUM."
+	fi
+
 	local goproxy_dir="${GOPROXY/file:\/\//}"
 	mkdir -p "${goproxy_dir}" || die
 
-- 
2.35.1



^ permalink raw reply related	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] [PATCH] go-module.eclass: ensure that A is less than 112 KiB
  2022-10-11 10:06             ` [gentoo-dev] [PATCH] go-module.eclass: ensure that A is less than 112 KiB Florian Schmaus
@ 2022-10-11 15:26               ` Mike Gilbert
  2022-10-11 15:58                 ` Florian Schmaus
  0 siblings, 1 reply; 58+ messages in thread
From: Mike Gilbert @ 2022-10-11 15:26 UTC (permalink / raw
  To: gentoo-dev; +Cc: Florian Schmaus

On Tue, Oct 11, 2022 at 6:06 AM Florian Schmaus <flow@gentoo.org> wrote:
>
> Packages with a large number of EGO_SUM entries, i.e., many thousands,
> cause SRC_URI, and in turn A, to become quite large. Prevent issues that
> are caused by large environment variables, e.g., execve() errors (see
> bug #719203), by ensuring that A stays below a reasonable size.

This code will never be reached: if the A environment variable is too
large, portage will fail to execute /bin/bash, and the phase function
will not be executed.

If you want to add an error for this, I think it would require changes
to Portage's python code.


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] RFC: check A's size in go-module.eclass
  2022-10-11 10:06           ` [gentoo-dev] RFC: check A's size in go-module.eclass Florian Schmaus
  2022-10-11 10:06             ` [gentoo-dev] [PATCH] go-module.eclass: ensure that A is less than 112 KiB Florian Schmaus
@ 2022-10-11 15:33             ` Mike Gilbert
  1 sibling, 0 replies; 58+ messages in thread
From: Mike Gilbert @ 2022-10-11 15:33 UTC (permalink / raw
  To: gentoo-dev

On Tue, Oct 11, 2022 at 6:06 AM Florian Schmaus <flow@gentoo.org> wrote:
>
>
> This is a first suggestion in an effort to reach a compromise that
> allows EGO_SUM to be un-depracted.
>
> I have decided to check the size of A, instead of counting the entries
> in EGO_SUM, because that seemed more sensible given that as A's size
> caused functional issues in the past (bug #719202 [1]).
>
> 1: https://bugs.gentoo.org/719202

I would suggest we simply add a comment to warn ebuild developers that
some unknown large number of modules may cause build failures. If/when
the ebuild starts to fail, they will know they went over the limit and
will have to start collapsing files into tarballs.


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] [PATCH] go-module.eclass: ensure that A is less than 112 KiB
  2022-10-11 15:26               ` Mike Gilbert
@ 2022-10-11 15:58                 ` Florian Schmaus
  0 siblings, 0 replies; 58+ messages in thread
From: Florian Schmaus @ 2022-10-11 15:58 UTC (permalink / raw
  To: Mike Gilbert, gentoo-dev


[-- Attachment #1.1.1: Type: text/plain, Size: 1367 bytes --]

On 11/10/2022 17.26, Mike Gilbert wrote:
> On Tue, Oct 11, 2022 at 6:06 AM Florian Schmaus <flow@gentoo.org> wrote:
>>
>> Packages with a large number of EGO_SUM entries, i.e., many thousands,
>> cause SRC_URI, and in turn A, to become quite large. Prevent issues that
>> are caused by large environment variables, e.g., execve() errors (see
>> bug #719203), by ensuring that A stays below a reasonable size.
> 
> This code will never be reached: if the A environment variable is too
> large, portage will fail to execute /bin/bash, and the phase function
> will not be executed.

I believe the code will never be reached if A is > 128 KiB (aka 
MAX_ARG_STRLEN). If A is (112, 128] KiB, then the code will be reached 
and run into the 'die'. If A is <= 112 KiB, then the code will be 
reached and continue without die'ing.

That said, I could also live with your suggestion to un-deprecate 
EGO_SUM without any size limitation (besides limitations not within 
Gentoo's direct control, like MAX_ARG_STRLEN). In this case, we should 
probably, at one point, improve portage's error reporting, so that, for 
example, execve() fails, portages tries to determine the culprit. For 
example, portage printing out environment variables that are too large. 
Which appear to be nice to have irregardless of the current EGO_SUM 
discussion.

- Flow


[-- Attachment #1.1.2: OpenPGP public key --]
[-- Type: application/pgp-keys, Size: 21081 bytes --]

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 495 bytes --]

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [gentoo-dev] Proposal to undeprecate EGO_SUM
  2022-06-17 16:27         ` William Hubbs
@ 2022-10-12 13:01           ` Florian Schmaus
  0 siblings, 0 replies; 58+ messages in thread
From: Florian Schmaus @ 2022-10-12 13:01 UTC (permalink / raw
  To: gentoo-dev

On 17/06/2022 18.27, William Hubbs wrote:
> On Mon, Jun 13, 2022 at 12:26:43PM +0200, Ulrich Mueller wrote:
>>>>>>> On Mon, 13 Jun 2022, Florian Schmaus wrote:
>>
>>>>>> Judging from the gentoo-dev@ mailing list discussion [1] about EGO_SUM,
>>>>>> where some voices where in agreement that EGO_SUM has its raison d'être,
>>>>>> while there where no arguments in favor of eventually removing EGO_SUM,
>>>>>> I hereby propose to undeprecate EGO_SUM.
>>>>>>
>>>>>> 1: https://archives.gentoo.org/gentoo-dev/message/1a64a8e7694c3ee11cd48a58a95f2faa
>>
>>>> Can this be done without requesting changes to package managers?
>>
>>> What is 'this' here?
>>
>> Undeprecating EGO_SUM.
>>
>>> The patchset does not make changes to any package manager, just the
>>> go-module eclass.
>>
>>> Note that this is not about finding about an alternative to dependency
>>> tarballs. It is just about re-allowing EGO_SUM in addition to
>>> dependency tarballs for packaging Go software in Gentoo.
> 
> Like I said on my earlier reply, there have been packages that break
> using EGO_SUM.

Those packages can't obviously use EGO_SUM, but this should *not* mean 
that we generally ban EGO_SUM.


> The most pressing concern about EGO_SUM is that it can make portage
> crash because of the size of SRC_URI, so it definitely should not be
> preferred over dependency tarballs.

I think an approach like my posted patch, which makes go-modules.eclass 
invoke 'die' if A exceeds a certain threshold, should make developers in 
most situations aware that it is time to switch their package to use a 
dependency tarball instead of EGO_SUM.

The remaining situations are the ones where a package initially exceeds 
the MAX_ARG_STRLEN limit, and where a certain USE-flag combination 
causes the limit to be exceeded. The former should not be real issue, as 
such ebuilds should never been committed, as they could never work. The 
later can be solved by exhaustive testing of all possible USE flag 
combinations.

- Flow


^ permalink raw reply	[flat|nested] 58+ messages in thread

end of thread, other threads:[~2022-10-12 13:01 UTC | newest]

Thread overview: 58+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-06-13  7:44 [gentoo-dev] Proposal to undeprecate EGO_SUM Florian Schmaus
2022-06-13  7:44 ` [gentoo-dev] [PATCH] go-module.eclass: " Florian Schmaus
2022-06-13  9:49   ` Andrew Ammerlaan
2022-06-13 10:25     ` Florian Schmaus
2022-06-17 15:53   ` William Hubbs
2022-06-13  8:29 ` [gentoo-dev] Proposal to " Michał Górny
2022-06-13  8:49   ` Ulrich Mueller
2022-06-13  9:34     ` Florian Schmaus
2022-06-13 10:26       ` Ulrich Mueller
2022-06-17 16:27         ` William Hubbs
2022-10-12 13:01           ` Florian Schmaus
2022-06-13  9:30   ` Florian Schmaus
2022-06-13 11:03     ` Michał Górny
2022-06-14  9:37   ` Michał Górny
2022-06-14 10:29     ` Florian Schmaus
2022-06-14 16:33       ` [gentoo-dev] " Holger Hoffstätte
2022-06-14 17:03         ` Florian Schmaus
2022-06-15  5:53           ` Michał Górny
2022-06-17 19:04             ` Michał Górny
2022-06-14 17:34 ` [gentoo-dev] " Arsen Arsenović
2022-06-26 23:43 ` Zoltan Puskas
2022-06-27  6:09   ` Oskari Pirhonen
2022-06-27  7:14     ` Zoltan Puskas
2022-07-15 21:34   ` William Hubbs
2022-07-16 11:24     ` Florian Schmaus
2022-07-16 11:58       ` Joonas Niilola
2022-07-16 17:51         ` William Hubbs
2022-07-16 18:31           ` Arthur Zamarin
2022-07-16 18:46             ` Robin H. Johnson
2022-07-16 19:35               ` William Hubbs
2022-07-16 20:20                 ` Ulrich Mueller
2022-07-17  1:37                   ` William Hubbs
2022-09-28 15:28 ` Florian Schmaus
2022-09-28 16:31   ` Ulrich Mueller
2022-09-30  0:36     ` William Hubbs
2022-09-30 14:53       ` Florian Schmaus
2022-09-30 15:48         ` William Hubbs
2022-09-30 19:18         ` Sam James
2022-10-11 10:06           ` [gentoo-dev] RFC: check A's size in go-module.eclass Florian Schmaus
2022-10-11 10:06             ` [gentoo-dev] [PATCH] go-module.eclass: ensure that A is less than 112 KiB Florian Schmaus
2022-10-11 15:26               ` Mike Gilbert
2022-10-11 15:58                 ` Florian Schmaus
2022-10-11 15:33             ` [gentoo-dev] RFC: check A's size in go-module.eclass Mike Gilbert
2022-09-30 19:49         ` [gentoo-dev] Proposal to undeprecate EGO_SUM Alec Warner
2022-10-01  0:06           ` William Hubbs
2022-10-01 13:42           ` Florian Schmaus
2022-10-01 16:36             ` Ulrich Mueller
2022-10-01 17:21               ` Florian Schmaus
2022-10-01 20:59                 ` William Hubbs
2022-09-30 20:07       ` Arsen Arsenović
2022-09-30 23:49         ` William Hubbs
2022-09-28 21:23   ` John Helmert III
2022-09-30 13:57     ` Florian Schmaus
2022-09-30 14:36       ` Jaco Kroon
2022-09-30 14:53         ` Florian Schmaus
2022-09-30 15:10           ` Jaco Kroon
2022-09-30 15:32             ` Zoltan Puskas
2022-09-30 19:02   ` Georgy Yakovlev

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox