From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from lists.gentoo.org ([140.105.134.102] helo=robin.gentoo.org) by nuthatch.gentoo.org with esmtp (Exim 4.50) id 1EVuYc-0002Xd-RW for garchives@archives.gentoo.org; Sat, 29 Oct 2005 17:31:55 +0000 Received: from robin.gentoo.org (localhost [127.0.0.1]) by robin.gentoo.org (8.13.5/8.13.5) with SMTP id j9THVJJ3011905; Sat, 29 Oct 2005 17:31:19 GMT Received: from smtp.gentoo.org (smtp.gentoo.org [134.68.220.30]) by robin.gentoo.org (8.13.5/8.13.5) with ESMTP id j9THVIjD016343 for ; Sat, 29 Oct 2005 17:31:19 GMT Received: from zh034158.ppp.dion.ne.jp ([222.3.34.158] helo=opteron246.suzuki-stubbs.home) by smtp.gentoo.org with esmtpa (Exim 4.43) id 1EVuY1-0006Zr-Uw for gentoo-portage-dev@lists.gentoo.org; Sat, 29 Oct 2005 17:31:18 +0000 Received: by opteron246.suzuki-stubbs.home (Postfix, from userid 1000) id 7F159248ADB; Sun, 30 Oct 2005 02:32:22 +0900 (JST) From: Jason Stubbs To: gentoo-portage-dev@lists.gentoo.org Subject: Re: [gentoo-portage-dev] [0/7] portage_exec cleanups (WAS: [Bug 104705] emerge doesn't print complete error message) Date: Sun, 30 Oct 2005 02:32:22 +0900 User-Agent: KMail/1.8.92 References: <200510231545.16596.jstubbs@gentoo.org> <200510291934.13207.jstubbs@gentoo.org> In-Reply-To: <200510291934.13207.jstubbs@gentoo.org> Precedence: bulk List-Post: List-Help: List-Unsubscribe: List-Subscribe: List-Id: Gentoo Linux mail X-BeenThere: gentoo-portage-dev@gentoo.org Reply-to: gentoo-portage-dev@lists.gentoo.org MIME-Version: 1.0 Content-Type: Multipart/Mixed; boundary="Boundary-00=_mI7YD+6Yf4vFVPq" Message-Id: <200510300232.22417.jstubbs@gentoo.org> X-Archives-Salt: ffe586e5-bb68-4f07-a8e2-9b1d1de08c81 X-Archives-Hash: 119f7c277bafa3ba1bfb1508a455adb2 --Boundary-00=_mI7YD+6Yf4vFVPq Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 7bit Content-Disposition: inline On Saturday 29 October 2005 19:34, Jason Stubbs wrote: > On Sunday 23 October 2005 15:45, Jason Stubbs wrote: > > Commented on the bug due to reasoning behind this patch. Essentially, > > SIGTERM is sent to "tee", a WNOHANG waitpid() is performed followed by > > SIGKILL if it hasn't exited. So if tee doesn't exit immediately upon > > getting the SIGTERM, its buffers won't get a chance to get to disk due to > > it being killed. This patch simply adds a 1 second window between the > > SIGTERM and the SIGKILL. > > Per discussion on the bug, the problem is actually that tee shouldn't be > sent a SIGTERM or SIGKILL at all. The decided solution was to wait on tee > rather ebuild. I went a bit further than that though and cleaned up most of > spawn(). Here's the full patch after incorporating feedback. It should be fairly easy to follow given the other explanations. The only change beyond what's already been gone over is that spawn() is now removing pids from spawned_pids as it cleans them up. -- Jason Stubbs --Boundary-00=_mI7YD+6Yf4vFVPq Content-Type: text/x-diff; charset="utf-8"; name="exec-cleanup.patch" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="exec-cleanup.patch" Index: portage_util.py =================================================================== --- portage_util.py (revision 2199) +++ portage_util.py (working copy) @@ -455,5 +455,15 @@ return mya +def dump_traceback(msg): + try: + import sys, traceback + writemsg("\n====================================\n") + writemsg("Warning: %s\n" % msg) + for line in traceback.format_list(traceback.extract_stack()[:-1]): + writemsg(line) + writemsg("Please file a bug for %s\n" % sys.argv[0]) + writemsg("====================================\n\n") + except: + pass - Index: portage.py =================================================================== --- portage.py (revision 2199) +++ portage.py (working copy) @@ -89,7 +89,7 @@ import portage_util from portage_util import grab_multiple, grabdict, grabdict_package, grabfile, grabfile_package, \ grabints, map_dictlist_vals, pickle_read, pickle_write, stack_dictlist, stack_dicts, stack_lists, \ - unique_array, varexpand, writedict, writeints, writemsg, getconfig + unique_array, varexpand, writedict, writeints, writemsg, getconfig, dump_traceback import portage_exception import portage_gpg import portage_locks @@ -2352,9 +2352,13 @@ return str(eapi).strip() == str(portage_const.EAPI).strip() -def doebuild(myebuild,mydo,myroot,mysettings,debug=0,listonly=0,fetchonly=0,cleanup=0,dbkey=None,use_cache=1,fetchall=0,tree="porttree"): +def doebuild(myebuild,mydo,myroot,mysettings,debug=0,listonly=0,fetchonly=0,cleanup=0,dbkey=None,use_cache=1,fetchall=0,tree=None): global db, actionmap_deps + if not tree: + dump_traceback("tree not specified to doebuild") + tree = "porttree" + ebuild_path = os.path.abspath(myebuild) pkg_dir = os.path.dirname(ebuild_path) @@ -2759,12 +2763,12 @@ print "!!! mydo=qmerge, but install phase hasn't been ran" sys.exit(1) #qmerge is specifically not supposed to do a runtime dep check - return merge(mysettings["CATEGORY"],mysettings["PF"],mysettings["D"],mysettings["BUILDDIR"]+"/build-info",myroot,mysettings,myebuild=mysettings["EBUILD"]) + return merge(mysettings["CATEGORY"],mysettings["PF"],mysettings["D"],mysettings["BUILDDIR"]+"/build-info",myroot,mysettings,myebuild=mysettings["EBUILD"],mytree=tree) elif mydo=="merge": retval=spawnebuild("install",actionmap,mysettings,debug,alwaysdep=1,logfile=logfile) if retval: return retval - return merge(mysettings["CATEGORY"],mysettings["PF"],mysettings["D"],mysettings["BUILDDIR"]+"/build-info",myroot,mysettings,myebuild=mysettings["EBUILD"]) + return merge(mysettings["CATEGORY"],mysettings["PF"],mysettings["D"],mysettings["BUILDDIR"]+"/build-info",myroot,mysettings,myebuild=mysettings["EBUILD"],mytree=tree) else: print "!!! Unknown mydo:",mydo sys.exit(1) @@ -2937,12 +2941,12 @@ return newmtime -def merge(mycat,mypkg,pkgloc,infloc,myroot,mysettings,myebuild=None): - mylink=dblink(mycat,mypkg,myroot,mysettings) +def merge(mycat,mypkg,pkgloc,infloc,myroot,mysettings,myebuild=None,mytree=None): + mylink=dblink(mycat,mypkg,myroot,mysettings,treetype=mytree) return mylink.merge(pkgloc,infloc,myroot,myebuild) def unmerge(cat,pkg,myroot,mysettings,mytrimworld=1): - mylink=dblink(cat,pkg,myroot,mysettings) + mylink=dblink(cat,pkg,myroot,mysettings,treetype="vartree") if mylink.exists(): mylink.unmerge(trimworld=mytrimworld,cleanup=1) mylink.delete() @@ -5393,7 +5397,7 @@ writemsg("Uncaught handled exception: %(exception)s\n" % {"exception":str(e)}) raise - myret=doebuild(myebuild,"depend","/",self.mysettings,dbkey=mydbkey) + myret=doebuild(myebuild,"depend","/",self.mysettings,dbkey=mydbkey,tree="porttree") if myret: portage_locks.unlockfile(mylock) self.lock_held = 0 @@ -6027,12 +6031,13 @@ class dblink: "this class provides an interface to the standard text package database" - def __init__(self,cat,pkg,myroot,mysettings): + def __init__(self,cat,pkg,myroot,mysettings,treetype=None): "create a dblink object for cat/pkg. This dblink entry may or may not exist" self.cat = cat self.pkg = pkg self.mycpv = self.cat+"/"+self.pkg self.mysplit = pkgsplit(self.mycpv) + self.treetype = treetype self.dbroot = os.path.normpath(myroot+VDB_PATH) self.dbcatdir = self.dbroot+"/"+cat @@ -6213,7 +6218,7 @@ #do prerm script if myebuildpath and os.path.exists(myebuildpath): - a=doebuild(myebuildpath,"prerm",self.myroot,self.settings,cleanup=cleanup,use_cache=0,tree="vartree") + a=doebuild(myebuildpath,"prerm",self.myroot,self.settings,cleanup=cleanup,use_cache=0,tree=self.treetype) # XXX: Decide how to handle failures here. if a != 0: writemsg("!!! FAILED prerm: "+str(a)+"\n") @@ -6419,7 +6424,7 @@ if myebuildpath and os.path.exists(myebuildpath): # XXX: This should be the old config, not the current one. # XXX: Use vardbapi to load up env vars. - a=doebuild(myebuildpath,"postrm",self.myroot,self.settings,use_cache=0,tree="vartree") + a=doebuild(myebuildpath,"postrm",self.myroot,self.settings,use_cache=0,tree=self.treetype) # XXX: Decide how to handle failures here. if a != 0: writemsg("!!! FAILED postrm: "+str(a)+"\n") @@ -6541,9 +6546,9 @@ if myebuild: # if we are merging a new ebuild, use *its* pre/postinst rather than using the one in /var/db/pkg # (if any). - a=doebuild(myebuild,"preinst",root,self.settings,cleanup=cleanup,use_cache=0) + a=doebuild(myebuild,"preinst",root,self.settings,cleanup=cleanup,use_cache=0,tree=self.treetype) else: - a=doebuild(inforoot+"/"+self.pkg+".ebuild","preinst",root,self.settings,cleanup=cleanup,use_cache=0) + a=doebuild(inforoot+"/"+self.pkg+".ebuild","preinst",root,self.settings,cleanup=cleanup,use_cache=0,tree=self.treetype) # XXX: Decide how to handle failures here. if a != 0: @@ -6658,9 +6663,9 @@ if myebuild: # if we are merging a new ebuild, use *its* pre/postinst rather than using the one in /var/db/pkg # (if any). - a=doebuild(myebuild,"postinst",root,self.settings,use_cache=0) + a=doebuild(myebuild,"postinst",root,self.settings,use_cache=0,tree=self.treetype) else: - a=doebuild(inforoot+"/"+self.pkg+".ebuild","postinst",root,self.settings,use_cache=0) + a=doebuild(inforoot+"/"+self.pkg+".ebuild","postinst",root,self.settings,use_cache=0,tree=self.treetype) # XXX: Decide how to handle failures here. if a != 0: @@ -7074,7 +7079,7 @@ # the merge takes care of pre/postinst and old instance # auto-unmerge, virtual/provides updates, etc. mysettings.load_infodir(infloc) - mylink=dblink(mycat,mypkg,myroot,mysettings) + mylink=dblink(mycat,mypkg,myroot,mysettings,treetype="bintree") mylink.merge(pkgloc,infloc,myroot,myebuild,cleanup=1) if not os.path.exists(infloc+"/RDEPEND"): Index: portage_exec.py =================================================================== --- portage_exec.py (revision 2199) +++ portage_exec.py (working copy) @@ -11,11 +11,8 @@ try: import resource - max_fd_limit=resource.getrlimit(RLIMIT_NOFILE) -except SystemExit, e: - raise -except: - # hokay, no resource module. + max_fd_limit=resource.getrlimit(resource.RLIMIT_NOFILE)[1] +except ImportError: max_fd_limit=256 spawned_pids = [] @@ -23,12 +20,9 @@ global spawned_pids while spawned_pids: pid = spawned_pids.pop() - try: - os.kill(pid,SIGKILL) - except SystemExit, e: - raise - except: - pass + if os.waitpid(pid,os.WNOHANG) == (0,0): + os.kill(pid,signal.SIGKILL) + os.waitpid(pid,0) atexit.register(cleanup) from portage_const import BASH_BINARY,SANDBOX_BINARY,SANDBOX_PIDS_FILE @@ -67,6 +61,7 @@ # base spawn function def spawn(mycommand,env={},opt_name=None,fd_pipes=None,returnpid=False,uid=None,gid=None,groups=None,umask=None,logfile=None,path_lookup=True): + global spawned_pids if type(mycommand)==types.StringType: mycommand=mycommand.split() myc = mycommand[0] @@ -76,21 +71,15 @@ myc = find_binary(myc) if myc == None: return None - + + if fd_pipes is None: + fd_pipes = {0:0, 1:1, 2:2} + mypid=[] if logfile: pr,pw=os.pipe() - mypid.extend(spawn(('tee','-i','-a',logfile),returnpid=True,fd_pipes={0:pr,1:1,2:2})) - retval=os.waitpid(mypid[-1],os.WNOHANG)[1] - if retval != 0: - # he's dead jim. - if (retval & 0xff)==0: - return (retval >> 8) # exit code - else: - return ((retval & 0xff) << 8) # signal - if not fd_pipes: - fd_pipes={} - fd_pipes[0] = 0 + mypid.extend(spawn(('tee','-i','-a',logfile),returnpid=True,fd_pipes={0:pr,1:fd_pipes[1],2:fd_pipes[2]})) + os.close(pr) fd_pipes[1]=pw fd_pipes[2]=pw @@ -100,41 +89,16 @@ myargs.extend(mycommand[1:]) mypid.append(os.fork()) if mypid[-1] == 0: - # this may look ugly, but basically it moves file descriptors around to ensure no - # handles that are needed are accidentally closed during the final dup2 calls. - trg_fd=[] - if type(fd_pipes)==types.DictType: - src_fd=[] - k=fd_pipes.keys() - k.sort() - for x in k: - trg_fd.append(x) - src_fd.append(fd_pipes[x]) - for x in range(0,len(trg_fd)): - if trg_fd[x] == src_fd[x]: - continue - if trg_fd[x] in src_fd[x+1:]: - new=os.dup2(trg_fd[x],max(src_fd) + 1) - os.close(trg_fd[x]) - try: - while True: - src_fd[s.index(trg_fd[x])]=new - except SystemExit, e: - raise - except: - pass - for x in range(0,len(trg_fd)): - if trg_fd[x] != src_fd[x]: - os.dup2(src_fd[x], trg_fd[x]) - else: - trg_fd=[0,1,2] - for x in range(0,max_fd_limit): - if x not in trg_fd: - try: + my_pipes = {} + for x in fd_pipes: + my_pipes[x] = os.dup(fd_pipes[x]) + for x in my_pipes: + os.dup2(my_pipes[x], x) + for x in range(max_fd_limit): + if x not in my_pipes: + try: os.close(x) - except SystemExit, e: - raise - except: + except OSError: pass # note this order must be preserved- can't change gid/groups if you change uid first. if gid: @@ -145,50 +109,29 @@ os.setuid(uid) if umask: os.umask(umask) - try: - # XXX: We would do this to stop ebuild.sh from getting any - # XXX: output, and consequently, we'd get to handle the sigINT. - #os.close(sys.stdin.fileno()) - pass - except SystemExit, e: - raise - except: - pass try: - #print "execing", myc, myargs os.execve(myc,myargs,env) - except SystemExit, e: - raise except Exception, e: - raise str(e)+":\n "+myc+" "+string.join(myargs) - # If the execve fails, we need to report it, and exit - # *carefully* --- report error here - os._exit(1) - sys.exit(1) - return # should never get reached + print str(e)+":\n "+myc+" "+string.join(myargs) + os._exit(1) if logfile: - os.close(pr) os.close(pw) if returnpid: - global spawned_pids spawned_pids.append(mypid[-1]) return mypid while len(mypid): - retval=os.waitpid(mypid[-1],0)[1] + pid = mypid.pop(0) + retval=os.waitpid(pid,0)[1] + if pid in spawned_pids: + spawned_pids.remove(pid) if retval != 0: - for x in mypid[0:-1]: - try: - os.kill(x,signal.SIGTERM) - if os.waitpid(x,os.WNOHANG)[1] == 0: - # feisty bugger, still alive. - os.kill(x,signal.SIGKILL) + for x in mypid: + if os.waitpid(x,os.WNOHANG) == (0,0): + os.kill(x,signal.SIGKILL) os.waitpid(x,0) - except OSError, oe: - if oe.errno not in (10,3): - raise oe # at this point we've killed all other kid pids generated via this call. # return now. @@ -197,8 +140,6 @@ return (retval >> 8) # return exit code else: return ((retval & 0xff) << 8) # interrupted by signal - else: - mypid.pop(-1) return 0 def find_binary(myc): @@ -211,5 +152,3 @@ return "%s/%s" % (x,myc) return None - - --Boundary-00=_mI7YD+6Yf4vFVPq-- -- gentoo-portage-dev@gentoo.org mailing list