* [gentoo-portage-dev] [1/3] Cache subsystem rewrite
2005-10-30 17:05 [gentoo-portage-dev] [0/3] Cache subsystem rewrite Brian Harring
@ 2005-10-30 17:14 ` Brian Harring
2005-11-03 15:43 ` Jason Stubbs
2005-10-30 17:27 ` [gentoo-portage-dev] [2/3] " Brian Harring
2005-10-30 17:27 ` [gentoo-portage-dev] [3/3] " Brian Harring
2 siblings, 1 reply; 8+ messages in thread
From: Brian Harring @ 2005-10-30 17:14 UTC (permalink / raw
To: gentoo-portage-dev
[-- Attachment #1.1: Type: text/plain, Size: 1098 bytes --]
This patch is the actual replacement cache subsystem.
Cache db backend classes are now designed as repository centric,
instead of category centric. Reasoning is that a cache should be
standalone, should be able to specify single obj for a repo, and it
allows moving the eclass cache into the cache backend, instead of
having it seperated (and incredibly slow).
anydbm, flat_hash, metadata (rsync's metadata/cache), flat_list are
all reimplemented.
Addition of a sql_template class, and a sqlite implementatin for
testing the sql_template class.
Right now... those are slow. Comes down the fact stable portage can
only ask the cache for a single cpv at a time, so it's not exactly
leveraging the power of a select statement. Inclusion of the sqlite
class can be moved to another package, although sql_template should
remain.
Finally... cache/util.py . This module holds code for doing cache X
-> cache Y cloning efficiently, while providing hooks for user code to
register callbacks (implementation of percentage counts, every N
merged, etc).
~harring
[-- Attachment #1.2: new-cache-subsystem.patch --]
[-- Type: text/plain, Size: 40467 bytes --]
Index: pym/cache/flat_hash.py
===================================================================
--- pym/cache/flat_hash.py (revision 0)
+++ pym/cache/flat_hash.py (revision 0)
@@ -0,0 +1,129 @@
+# Copyright: 2005 Gentoo Foundation
+# Author(s): Brian Harring (ferringb@gentoo.org)
+# License: GPL2
+# $Id: flat_list.py 1911 2005-08-25 03:44:21Z ferringb $
+
+import fs_template
+import cache_errors
+import os, stat
+from mappings import LazyLoad, ProtectedDict
+from template import reconstruct_eclasses
+# store the current key order *here*.
+class database(fs_template.FsBased):
+
+ autocommits = True
+
+ def __init__(self, *args, **config):
+ super(database,self).__init__(*args, **config)
+ self.location = os.path.join(self.location,
+ self.label.lstrip(os.path.sep).rstrip(os.path.sep))
+
+ if not os.path.exists(self.location):
+ self._ensure_dirs()
+
+ def __getitem__(self, cpv):
+ fp = os.path.join(self.location, cpv)
+ try:
+ def curry(*args):
+ def callit(*args2):
+ return args[0](*args[1:]+args2)
+ return callit
+ return ProtectedDict(LazyLoad(curry(self._pull, fp, cpv), initial_items=[("_mtime_", os.stat(fp).st_mtime)]))
+ except OSError:
+ raise KeyError(cpv)
+ return self._getitem(cpv)
+
+ def _pull(self, fp, cpv):
+ try:
+ myf = open(fp,"r")
+ except IOError:
+ raise KeyError(cpv)
+ except OSError, e:
+ raise cache_errors.CacheCorruption(cpv, e)
+ try:
+ d = self._parse_data(myf, cpv)
+ except (OSError, ValueError), e:
+ myf.close()
+ raise cache_errors.CacheCorruption(cpv, e)
+ myf.close()
+ return d
+
+
+ def _parse_data(self, data, cpv, mtime=0):
+ d = dict(map(lambda x:x.rstrip().split("=", 1), data))
+ if mtime != 0:
+ d["_mtime_"] = long(mtime)
+ if "_eclasses_" in d:
+ d["_eclasses_"] = reconstruct_eclasses(cpv, d["_eclasses_"])
+ return d
+
+ for x in self._known_keys:
+ if x not in d:
+ d[x] = ''
+
+
+ return d
+
+
+ def _setitem(self, cpv, values):
+# import pdb;pdb.set_trace()
+ s = cpv.rfind("/")
+ fp = os.path.join(self.location,cpv[:s],".update.%i.%s" % (os.getpid(), cpv[s+1:]))
+ try: myf=open(fp, "w")
+ except IOError, ie:
+ if ie.errno == 2:
+ try:
+ self._ensure_dirs(cpv)
+ myf=open(fp,"w")
+ except (OSError, IOError),e:
+ raise cache_errors.CacheCorruption(cpv, e)
+ except OSError, e:
+ raise cache_errors.CacheCorruption(cpv, e)
+
+ for k, v in values.items():
+ if k != "_mtime_":
+ myf.writelines("%s=%s\n" % (k, v))
+
+ myf.close()
+ self._ensure_access(fp, mtime=values["_mtime_"])
+
+ #update written. now we move it.
+
+ new_fp = os.path.join(self.location,cpv)
+ try: os.rename(fp, new_fp)
+ except (OSError, IOError), e:
+ os.remove(fp)
+ raise cache_errors.CacheCorruption(cpv, e)
+
+
+ def _delitem(self, cpv):
+# import pdb;pdb.set_trace()
+ try:
+ os.remove(os.path.join(self.location,cpv))
+ except OSError, e:
+ if e.errno == 2:
+ raise KeyError(cpv)
+ else:
+ raise cache_errors.CacheCorruption(cpv, e)
+
+
+ def has_key(self, cpv):
+ return os.path.exists(os.path.join(self.location, cpv))
+
+
+ def iterkeys(self):
+ """generator for walking the dir struct"""
+ dirs = [self.location]
+ len_base = len(self.location)
+ while len(dirs):
+ for l in os.listdir(dirs[0]):
+ if l.endswith(".cpickle"):
+ continue
+ p = os.path.join(dirs[0],l)
+ st = os.lstat(p)
+ if stat.S_ISDIR(st.st_mode):
+ dirs.append(p)
+ continue
+ yield p[len_base+1:]
+ dirs.pop(0)
+
Index: pym/cache/sql_template.py
===================================================================
--- pym/cache/sql_template.py (revision 0)
+++ pym/cache/sql_template.py (revision 0)
@@ -0,0 +1,275 @@
+# Copyright: 2005 Gentoo Foundation
+# Author(s): Brian Harring (ferringb@gentoo.org)
+# License: GPL2
+# $Id: sql_template.py 1911 2005-08-25 03:44:21Z ferringb $
+
+import template, cache_errors
+from template import reconstruct_eclasses
+
+class SQLDatabase(template.database):
+ """template class for RDBM based caches
+
+ This class is designed such that derivatives don't have to change much code, mostly constant strings.
+ _BaseError must be an exception class that all Exceptions thrown from the derived RDBMS are derived
+ from.
+
+ SCHEMA_INSERT_CPV_INTO_PACKAGE should be modified dependant on the RDBMS, as should SCHEMA_PACKAGE_CREATE-
+ basically you need to deal with creation of a unique pkgid. If the dbapi2 rdbms class has a method of
+ recovering that id, then modify _insert_cpv to remove the extra select.
+
+ Creation of a derived class involves supplying _initdb_con, and table_exists.
+ Additionally, the default schemas may have to be modified.
+ """
+
+ SCHEMA_PACKAGE_NAME = "package_cache"
+ SCHEMA_PACKAGE_CREATE = "CREATE TABLE %s (\
+ pkgid INTEGER PRIMARY KEY, label VARCHAR(255), cpv VARCHAR(255), UNIQUE(label, cpv))" % SCHEMA_PACKAGE_NAME
+ SCHEMA_PACKAGE_DROP = "DROP TABLE %s" % SCHEMA_PACKAGE_NAME
+
+ SCHEMA_VALUES_NAME = "values_cache"
+ SCHEMA_VALUES_CREATE = "CREATE TABLE %s ( pkgid integer references %s (pkgid) on delete cascade, \
+ key varchar(255), value text, UNIQUE(pkgid, key))" % (SCHEMA_VALUES_NAME, SCHEMA_PACKAGE_NAME)
+ SCHEMA_VALUES_DROP = "DROP TABLE %s" % SCHEMA_VALUES_NAME
+ SCHEMA_INSERT_CPV_INTO_PACKAGE = "INSERT INTO %s (label, cpv) VALUES(%%s, %%s)" % SCHEMA_PACKAGE_NAME
+
+ _BaseError = ()
+ _dbClass = None
+
+ autocommits = False
+# cleanse_keys = True
+
+ # boolean indicating if the derived RDBMS class supports replace syntax
+ _supports_replace = False
+
+ def __init__(self, location, label, auxdbkeys, *args, **config):
+ """initialize the instance.
+ derived classes shouldn't need to override this"""
+
+ super(SQLDatabase, self).__init__(location, label, auxdbkeys, *args, **config)
+
+ config.setdefault("host","127.0.0.1")
+ config.setdefault("autocommit", self.autocommits)
+ self._initdb_con(config)
+
+ self.label = self._sfilter(self.label)
+
+
+ def _dbconnect(self, config):
+ """should be overridden if the derived class needs special parameters for initializing
+ the db connection, or cursor"""
+ self.db = self._dbClass(**config)
+ self.con = self.db.cursor()
+
+
+ def _initdb_con(self,config):
+ """ensure needed tables are in place.
+ If the derived class needs a different set of table creation commands, overload the approriate
+ SCHEMA_ attributes. If it needs additional execution beyond, override"""
+
+ self._dbconnect(config)
+ if not self._table_exists(self.SCHEMA_PACKAGE_NAME):
+ if self.readonly:
+ raise cache_errors.ReadOnlyRestriction("table %s doesn't exist" % \
+ self.SCHEMA_PACKAGE_NAME)
+ try: self.con.execute(self.SCHEMA_PACKAGE_CREATE)
+ except self._BaseError, e:
+ raise cache_errors.InitializationError(self.__class__, e)
+
+ if not self._table_exists(self.SCHEMA_VALUES_NAME):
+ if self.readonly:
+ raise cache_errors.ReadOnlyRestriction("table %s doesn't exist" % \
+ self.SCHEMA_VALUES_NAME)
+ try: self.con.execute(self.SCHEMA_VALUES_CREATE)
+ except self._BaseError, e:
+ raise cache_errors.InitializationError(self.__class__, e)
+
+
+ def _table_exists(self, tbl):
+ """return true if a table exists
+ derived classes must override this"""
+ raise NotImplementedError
+
+
+ def _sfilter(self, s):
+ """meta escaping, returns quoted string for use in sql statements"""
+ return "\"%s\"" % s.replace("\\","\\\\").replace("\"","\\\"")
+
+
+ def _getitem(self, cpv):
+ try: self.con.execute("SELECT key, value FROM %s NATURAL JOIN %s "
+ "WHERE label=%s AND cpv=%s" % (self.SCHEMA_PACKAGE_NAME, self.SCHEMA_VALUES_NAME,
+ self.label, self._sfilter(cpv)))
+ except self._BaseError, e:
+ raise cache_errors.CacheCorruption(self, cpv, e)
+
+ rows = self.con.fetchall()
+
+ if len(rows) == 0:
+ raise KeyError(cpv)
+
+ vals = dict([(k,"") for k in self._known_keys])
+ vals.update(dict(rows))
+ return vals
+
+
+ def _delitem(self, cpv):
+ """delete a cpv cache entry
+ derived RDBM classes for this *must* either support cascaded deletes, or
+ override this method"""
+ try:
+ try:
+ self.con.execute("DELETE FROM %s WHERE label=%s AND cpv=%s" % \
+ (self.SCHEMA_PACKAGE_NAME, self.label, self._sfilter(cpv)))
+ if self.autocommits:
+ self.commit()
+ except self._BaseError, e:
+ raise cache_errors.CacheCorruption(self, cpv, e)
+ if self.con.rowcount <= 0:
+ raise KeyError(cpv)
+ except Exception:
+ if not self.autocommits:
+ self.db.rollback()
+ # yes, this can roll back a lot more then just the delete. deal.
+ raise
+
+ def __del__(self):
+ # just to be safe.
+ if "db" in self.__dict__ and self.db != None:
+ self.commit()
+ self.db.close()
+
+ def _setitem(self, cpv, values):
+
+ try:
+ # insert.
+ try: pkgid = self._insert_cpv(cpv)
+ except self._BaseError, e:
+ raise cache_errors.CacheCorruption(cpv, e)
+
+ # __getitem__ fills out missing values,
+ # so we store only what's handed to us and is a known key
+ db_values = []
+ for key in self._known_keys:
+ if values.has_key(key) and values[key] != '':
+ db_values.append({"key":key, "value":values[key]})
+
+ if len(db_values) > 0:
+ try: self.con.executemany("INSERT INTO %s (pkgid, key, value) VALUES(\"%s\", %%(key)s, %%(value)s)" % \
+ (self.SCHEMA_VALUES_NAME, str(pkgid)), db_values)
+ except self._BaseError, e:
+ raise cache_errors.CacheCorruption(cpv, e)
+ if self.autocommits:
+ self.commit()
+
+ except Exception:
+ if not self.autocommits:
+ try: self.db.rollback()
+ except self._BaseError: pass
+ raise
+
+
+ def _insert_cpv(self, cpv):
+ """uses SCHEMA_INSERT_CPV_INTO_PACKAGE, which must be overloaded if the table definition
+ doesn't support auto-increment columns for pkgid.
+ returns the cpvs new pkgid
+ note this doesn't commit the transaction. The caller is expected to."""
+
+ cpv = self._sfilter(cpv)
+ if self._supports_replace:
+ query_str = self.SCHEMA_INSERT_CPV_INTO_PACKAGE.replace("INSERT","REPLACE",1)
+ else:
+ # just delete it.
+ try: del self[cpv]
+ except (cache_errors.CacheCorruption, KeyError): pass
+ query_str = self.SCHEMA_INSERT_CPV_INTO_PACKAGE
+ try:
+ self.con.execute(query_str % (self.label, cpv))
+ except self._BaseError:
+ self.db.rollback()
+ raise
+ self.con.execute("SELECT pkgid FROM %s WHERE label=%s AND cpv=%s" % \
+ (self.SCHEMA_PACKAGE_NAME, self.label, cpv))
+
+ if self.con.rowcount != 1:
+ raise cache_error.CacheCorruption(cpv, "Tried to insert the cpv, but found "
+ " %i matches upon the following select!" % len(rows))
+ return self.con.fetchone()[0]
+
+
+ def has_key(self, cpv):
+ if not self.autocommits:
+ try: self.commit()
+ except self._BaseError, e:
+ raise cache_errors.GeneralCacheCorruption(e)
+
+ try: self.con.execute("SELECT cpv FROM %s WHERE label=%s AND cpv=%s" % \
+ (self.SCHEMA_PACKAGE_NAME, self.label, self._sfilter(cpv)))
+ except self._BaseError, e:
+ raise cache_errors.GeneralCacheCorruption(e)
+ return self.con.rowcount > 0
+
+
+ def iterkeys(self):
+ if not self.autocommits:
+ try: self.commit()
+ except self._BaseError, e:
+ raise cache_errors.GeneralCacheCorruption(e)
+
+ try: self.con.execute("SELECT cpv FROM %s WHERE label=%s" %
+ (self.SCHEMA_PACKAGE_NAME, self.label))
+ except self._BaseError, e:
+ raise cache_errors.GeneralCacheCorruption(e)
+# return [ row[0] for row in self.con.fetchall() ]
+ for x in self.con.fetchall():
+ yield x[0]
+
+ def iteritems(self):
+ try: self.con.execute("SELECT cpv, key, value FROM %s NATURAL JOIN %s "
+ "WHERE label=%s" % (self.SCHEMA_PACKAGE_NAME, self.SCHEMA_VALUES_NAME,
+ self.label))
+ except self._BaseError, e:
+ raise cache_errors.CacheCorruption(self, cpv, e)
+
+ oldcpv = None
+ l = []
+ for x, y, v in self.con.fetchall():
+ if oldcpv != x:
+ if oldcpv != None:
+ d = dict(l)
+ if "_eclasses_" in d:
+ d["_eclasses_"] = reconstruct_eclasses(oldcpv, d["_eclasses_"])
+ yield cpv, d
+ l.clear()
+ oldcpv = x
+ l.append((y,v))
+ if oldcpv != None:
+ d = dict(l)
+ if "_eclasses_" in d:
+ d["_eclasses_"] = reconstruct_eclasses(oldcpv, d["_eclasses_"])
+ yield cpv, d
+
+ def commit(self):
+ self.db.commit()
+
+ def get_matches(self,match_dict):
+ query_list = []
+ for k,v in match_dict.items():
+ if k not in self._known_keys:
+ raise cache_errors.InvalidRestriction(k, v, "key isn't known to this cache instance")
+ v = v.replace("%","\\%")
+ v = v.replace(".*","%")
+ query_list.append("(key=%s AND value LIKE %s)" % (self._sfilter(k), self._sfilter(v)))
+
+ if len(query_list):
+ query = " AND "+" AND ".join(query_list)
+ else:
+ query = ''
+
+ print "query = SELECT cpv from package_cache natural join values_cache WHERE label=%s %s" % (self.label, query)
+ try: self.con.execute("SELECT cpv from package_cache natural join values_cache WHERE label=%s %s" % \
+ (self.label, query))
+ except self._BaseError, e:
+ raise cache_errors.GeneralCacheCorruption(e)
+
+ return [ row[0] for row in self.con.fetchall() ]
+
Index: pym/cache/anydbm.py
===================================================================
--- pym/cache/anydbm.py (revision 0)
+++ pym/cache/anydbm.py (revision 0)
@@ -0,0 +1,75 @@
+# Copyright: 2005 Gentoo Foundation
+# Author(s): Brian Harring (ferringb@gentoo.org)
+# License: GPL2
+# $Id: anydbm.py 1911 2005-08-25 03:44:21Z ferringb $
+
+anydbm_module = __import__("anydbm")
+try:
+ import cPickle as pickle
+except ImportError:
+ import pickle
+import os
+import fs_template
+import cache_errors
+
+
+class database(fs_template.FsBased):
+
+ autocommits = True
+ cleanse_keys = True
+
+ def __init__(self, *args, **config):
+ super(database,self).__init__(*args, **config)
+
+ default_db = config.get("dbtype","anydbm")
+ if not default_db.startswith("."):
+ default_db = '.' + default_db
+
+ self._db_path = os.path.join(self.location, fs_template.gen_label(self.location, self.label)+default_db)
+ self.__db = None
+ try:
+ self.__db = anydbm_module.open(self._db_path, "w", self._perms)
+
+ except anydbm_module.error:
+ # XXX handle this at some point
+ try:
+ self._ensure_dirs()
+ self._ensure_dirs(self._db_path)
+ self._ensure_access(self._db_path)
+ except (OSError, IOError), e:
+ raise cache_errors.InitializationError(self.__class__, e)
+
+ # try again if failed
+ try:
+ if self.__db == None:
+ self.__db = anydbm_module.open(self._db_path, "c", self._perms)
+ except andbm_module.error, e:
+ raise cache_errors.InitializationError(self.__class__, e)
+
+ def iteritems(self):
+ return self.__db.iteritems()
+
+ def __getitem__(self, cpv):
+ # we override getitem because it's just a cpickling of the data handed in.
+ return pickle.loads(self.__db[cpv])
+
+
+ def _setitem(self, cpv, values):
+ self.__db[cpv] = pickle.dumps(values,pickle.HIGHEST_PROTOCOL)
+
+ def _delitem(self, cpv):
+ del self.__db[cpv]
+
+
+ def iterkeys(self):
+ return iter(self.__db)
+
+
+ def has_key(self, cpv):
+ return cpv in self.__db
+
+
+ def __del__(self):
+ if "__db" in self.__dict__ and self.__db != None:
+ self.__db.sync()
+ self.__db.close()
Index: pym/cache/template.py
===================================================================
--- pym/cache/template.py (revision 0)
+++ pym/cache/template.py (revision 0)
@@ -0,0 +1,171 @@
+# Copyright: 2005 Gentoo Foundation
+# Author(s): Brian Harring (ferringb@gentoo.org)
+# License: GPL2
+# $Id: template.py 1911 2005-08-25 03:44:21Z ferringb $
+
+import cache_errors
+from mappings import ProtectedDict
+
+class database(object):
+ # this is for metadata/cache transfer.
+ # basically flags the cache needs be updated when transfered cache to cache.
+ # leave this.
+
+ complete_eclass_entries_ = True
+ autocommits = False
+ cleanse_keys = False
+
+ def __init__(self, location, label, auxdbkeys, readonly=False):
+ """ initialize the derived class; specifically, store label/keys"""
+ self._known_keys = auxdbkeys
+ self.location = location
+ self.label = label
+ self.readonly = readonly
+ self.sync_rate = 0
+ self.updates = 0
+
+
+ def __getitem__(self, cpv):
+ """set a cpv to values
+ This shouldn't be overriden in derived classes since it handles the __eclasses__ conversion.
+ that said, if the class handles it, they can override it."""
+ if self.updates > self.sync_rate:
+ self.commit()
+ self.updates = 0
+ d=self._getitem(cpv)
+ if "_eclasses_" in d:
+ d["_eclasses_"] = reconstruct_eclasses(cpv, d["_eclasses_"])
+ return d
+
+ def _getitem(self, cpv):
+ """get cpv's values.
+ override this in derived classess"""
+ raise NotImplementedError
+
+
+ def __setitem__(self, cpv, values):
+ """set a cpv to values
+ This shouldn't be overriden in derived classes since it handles the readonly checks"""
+ if self.readonly:
+ raise cache_errors.ReadOnlyRestriction()
+ if self.cleanse_keys:
+ d=ProtectedDict(values)
+ for k in d.keys():
+ if d[k] == '':
+ del d[k]
+ if "_eclasses_" in values:
+ d["_eclasses_"] = serialize_eclasses(d["_eclasses_"])
+ elif "_eclasses_" in values:
+ d = ProtectedDict(values)
+ d["_eclasses_"] = serialize_eclasses(d["_eclasses_"])
+ else:
+ d = values
+ self._setitem(cpv, d)
+ if not self.autocommits:
+ self.updates += 1
+ if self.updates > self.sync_rate:
+ self.commit()
+ self.updates = 0
+
+
+ def _setitem(self, name, values):
+ """__setitem__ calls this after readonly checks. override it in derived classes
+ note _eclassees_ key *must* be handled"""
+ raise NotImplementedError
+
+
+ def __delitem__(self, cpv):
+ """delete a key from the cache.
+ This shouldn't be overriden in derived classes since it handles the readonly checks"""
+ if self.readonly:
+ raise cache_errors.ReadOnlyRestriction()
+ if not self.autocommits:
+ self.updates += 1
+ self._delitem(cpv)
+ if self.updates > self.sync_rate:
+ self.commit()
+ self.updates = 0
+
+
+ def _delitem(self,cpv):
+ """__delitem__ calls this after readonly checks. override it in derived classes"""
+ raise NotImplementedError
+
+
+ def has_key(self, cpv):
+ raise NotImplementedError
+
+
+ def keys(self):
+ return tuple(self.iterkeys())
+
+ def iterkeys(self):
+ raise NotImplementedError
+
+ def iteritems(self):
+ for x in self.iterkeys():
+ yield (x, self[x])
+
+ def items(self):
+ return list(self.iteritems())
+
+ def sync(self, rate=0):
+ self.sync_rate = rate
+ if(rate == 0):
+ self.commit()
+
+ def commit(self):
+ if not self.autocommits:
+ raise NotImplementedError
+
+ def get_matches(self, match_dict):
+ """generic function for walking the entire cache db, matching restrictions to
+ filter what cpv's are returned. Derived classes should override this if they
+ can implement a faster method then pulling each cpv:values, and checking it.
+
+ For example, RDBMS derived classes should push the matching logic down to the
+ actual RDBM."""
+
+ import re
+ restricts = {}
+ for key,match in match_dict.iteritems():
+ # XXX this sucks.
+ try:
+ if isinstance(match, str):
+ restricts[key] = re.compile(match).match
+ else:
+ restricts[key] = re.compile(match[0],match[1]).match
+ except re.error, e:
+ raise InvalidRestriction(key, match, e)
+ if key not in self.__known_keys:
+ raise InvalidRestriction(key, match, "Key isn't valid")
+
+ for cpv in self.keys():
+ cont = True
+ vals = self[cpv]
+ for key, match in restricts.iteritems():
+ if not match(vals[key]):
+ cont = False
+ break
+ if cont:
+# yield cpv,vals
+ yield cpv
+
+
+def serialize_eclasses(eclass_dict):
+ """takes a dict, returns a string representing said dict"""
+ return "\t".join(["%s\t%s\t%s" % (k, v[0], str(v[1])) for k,v in eclass_dict.items()])
+
+def reconstruct_eclasses(cpv, eclass_string):
+ """returns a dict when handed a string generated by serialize_eclasses"""
+ eclasses = eclass_string.rstrip().lstrip().split("\t")
+ if eclasses == [""]:
+ # occasionally this occurs in the fs backends. they suck.
+ return {}
+ if len(eclasses) % 3 != 0:
+ raise cache_errors.CacheCorruption(cpv, "_eclasses_ was of invalid len %i" % len(eclasses))
+ d={}
+ for x in range(0, len(eclasses), 3):
+ d[eclasses[x]] = (eclasses[x + 1], long(eclasses[x + 2]))
+ del eclasses
+ return d
Index: pym/cache/util.py
===================================================================
--- pym/cache/util.py (revision 0)
+++ pym/cache/util.py (revision 0)
@@ -0,0 +1,103 @@
+# Copyright: 2005 Gentoo Foundation
+# Author(s): Brian Harring (ferringb@gentoo.org)
+# License: GPL2
+# $Id: util.py 1911 2005-08-25 03:44:21Z ferringb $
+
+import cache_errors
+
+def mirror_cache(valid_nodes_iterable, src_cache, trg_cache, eclass_cache=None, verbose_instance=None):
+
+ if not src_cache.complete_eclass_entries and not eclass_cache:
+ raise Exception("eclass_cache required for cache's of class %s!" % src_cache.__class__)
+
+ if verbose_instance == None:
+ noise=quiet_mirroring()
+ else:
+ noise=verbose_instance
+
+ dead_nodes = {}
+ dead_nodes.fromkeys(trg_cache.keys())
+ count=0
+
+ if not trg_cache.autocommits:
+ trg_cache.sync(100)
+
+ for x in valid_nodes_iterable:
+# print "processing x=",x
+ count+=1
+ if dead_nodes.has_key(x):
+ del dead_nodes[x]
+ try: entry = src_cache[x]
+ except KeyError, e:
+ noise.missing_entry(x)
+ del e
+ continue
+ write_it = True
+ try:
+ trg = trg_cache[x]
+ if long(trg["_mtime_"]) == long(entry["_mtime_"]) and eclass_cache.is_eclass_data_valid(trg["_eclasses_"]):
+ write_it = False
+ except (cache_errors.CacheError, KeyError):
+ pass
+
+ if write_it:
+ if entry.get("INHERITED",""):
+ if src_cache.complete_eclass_entries:
+ if not "_eclasses_" in entry:
+ noise.corruption(x,"missing _eclasses_ field")
+ continue
+ if not eclass_cache.is_eclass_data_valid(entry["_eclasses_"]):
+ noise.eclass_stale(x)
+ continue
+ else:
+ entry["_eclasses_"] = eclass_cache.get_eclass_data(entry["INHERITED"].split(), \
+ from_master_only=True)
+ if not entry["_eclasses_"]:
+ noise.eclass_stale(x)
+ continue
+
+ # by this time, if it reaches here, the eclass has been validated, and the entry has
+ # been updated/translated (if needs be, for metadata/cache mainly)
+ try: trg_cache[x] = entry
+ except cache_errors.CacheError, ce:
+ noise.exception(x, ce)
+ del ce
+ continue
+ if count >= noise.call_update_min:
+ noise.update(x)
+ count = 0
+
+ if not trg_cache.autocommits:
+ trg_cache.commit()
+
+ # ok. by this time, the trg_cache is up to date, and we have a dict
+ # with a crapload of cpv's. we now walk the target db, removing stuff if it's in the list.
+ for key in dead_nodes:
+ try: del trg_cache[key]
+ except cache_errors.CacheError, ce:
+ noise.exception(ce)
+ del ce
+ dead_nodes.clear()
+ noise.finish()
+
+
+class quiet_mirroring(object):
+ # call_update_every is used by mirror_cache to determine how often to call in.
+ # quiet defaults to 2^24 -1. Don't call update, 'cept once every 16 million or so :)
+ call_update_min = 0xffffff
+ def update(self,key,*arg): pass
+ def exception(self,key,*arg): pass
+ def eclass_stale(self,*arg): pass
+ def missing_entry(self, key): pass
+ def misc(self,key,*arg): pass
+ def corruption(self, key, s): pass
+ def finish(self, *arg): pass
+
+class non_quiet_mirroring(quiet_mirroring):
+ call_update_min=1
+ def update(self,key,*arg): print "processed",key
+ def exception(self, key, *arg): print "exec",key,arg
+ def missing(self,key): print "key %s is missing", key
+ def corruption(self,key,*arg): print "corrupt %s:" % key,arg
+ def eclass_stale(self,key,*arg):print "stale %s:"%key,arg
+
Index: pym/cache/mappings.py
===================================================================
--- pym/cache/mappings.py (revision 0)
+++ pym/cache/mappings.py (revision 0)
@@ -0,0 +1,104 @@
+# Copyright: 2005 Gentoo Foundation
+# Author(s): Brian Harring (ferringb@gentoo.org)
+# License: GPL2
+# $Id: mappings.py 2015 2005-09-20 23:14:26Z ferringb $
+
+import UserDict, copy
+
+class ProtectedDict(UserDict.DictMixin):
+ """
+ given an initial dict, this wraps that dict storing changes in a secondary dict, protecting
+ the underlying dict from changes
+ """
+ __slots__=("orig","new","blacklist")
+
+ def __init__(self, orig):
+ self.orig = orig
+ self.new = {}
+ self.blacklist = {}
+
+
+ def __setitem__(self, key, val):
+ self.new[key] = val
+ if key in self.blacklist:
+ del self.blacklist[key]
+
+
+ def __getitem__(self, key):
+ if key in self.new:
+ return self.new[key]
+ if key in self.blacklist:
+ raise KeyError(key)
+ return self.orig[key]
+
+
+ def __delitem__(self, key):
+ if key in self.new:
+ del self.new[key]
+ elif key in self.orig:
+ if key not in self.blacklist:
+ self.blacklist[key] = True
+ return
+ raise KeyError(key)
+
+
+ def __iter__(self):
+ for k in self.new.iterkeys():
+ yield k
+ for k in self.orig.iterkeys():
+ if k not in self.blacklist and k not in self.new:
+ yield k
+
+
+ def keys(self):
+ return list(self.__iter__())
+
+
+ def has_key(self, key):
+ return key in self.new or (key not in self.blacklist and key in self.orig)
+
+
+class LazyLoad(UserDict.DictMixin):
+ """
+ given an initial dict, this wraps that dict storing changes in a secondary dict, protecting
+ the underlying dict from changes
+ """
+ __slots__=("pull", "d")
+
+ def __init__(self, pull_items, initial_items=[]):
+ self.d = {}
+ for k, v in initial_items:
+ self.d[k] = v
+ self.pull = pull_items
+
+ def __getitem__(self, key):
+ if key in self.d:
+ return self.d[key]
+ elif self.pull != None:
+ self.d.update(self.pull())
+ self.pull = None
+ return self.d[key]
+
+
+ def __iter__(self):
+ return iter(self.keys())
+
+ def keys(self):
+ if self.pull != None:
+ self.d.update(self.pull())
+ self.pull = None
+ return self.d.keys()
+
+
+ def has_key(self, key):
+ return key in self
+
+
+ def __contains__(self, key):
+ if key in self.d:
+ return True
+ elif self.pull != None:
+ self.d.update(self.pull())
+ self.pull = None
+ return key in self.d
+
Index: pym/cache/cache_errors.py
===================================================================
--- pym/cache/cache_errors.py (revision 0)
+++ pym/cache/cache_errors.py (revision 0)
@@ -0,0 +1,41 @@
+# Copyright: 2005 Gentoo Foundation
+# Author(s): Brian Harring (ferringb@gentoo.org)
+# License: GPL2
+# $Id: cache_errors.py 1911 2005-08-25 03:44:21Z ferringb $
+
+class CacheError(Exception): pass
+
+class InitializationError(CacheError):
+ def __init__(self, class_name, error):
+ self.error, self.class_name = error, class_name
+ def __str__(self):
+ return "Creation of instance %s failed due to %s" % \
+ (self.class_name, str(self.error))
+
+
+class CacheCorruption(CacheError):
+ def __init__(self, key, ex):
+ self.key, self.ex = key, ex
+ def __str__(self):
+ return "%s is corrupt: %s" % (self.key, str(self.ex))
+
+
+class GeneralCacheCorruption(CacheError):
+ def __init__(self,ex): self.ex = ex
+ def __str__(self): return "corruption detected: %s" % str(self.ex)
+
+
+class InvalidRestriction(CacheError):
+ def __init__(self, key, restriction, exception=None):
+ if exception == None: exception = ''
+ self.key, self.restriction, self.ex = key, restriction, ex
+ def __str__(self):
+ return "%s:%s is not valid: %s" % \
+ (self.key, self.restriction, str(self.ex))
+
+
+class ReadOnlyRestriction(CacheError):
+ def __init__(self, info=''):
+ self.info = info
+ def __str__(self):
+ return "cache is non-modifiable"+str(self.info)
Index: pym/cache/__init__.py
===================================================================
--- pym/cache/__init__.py (revision 0)
+++ pym/cache/__init__.py (revision 0)
@@ -0,0 +1,5 @@
+# Copyright: 2005 Gentoo Foundation
+# Author(s): Brian Harring (ferringb@gentoo.org)
+# License: GPL2
+# $Id: __init__.py 1911 2005-08-25 03:44:21Z ferringb $
+
Index: pym/cache/metadata.py
===================================================================
--- pym/cache/metadata.py (revision 0)
+++ pym/cache/metadata.py (revision 0)
@@ -0,0 +1,86 @@
+# Copyright: 2005 Gentoo Foundation
+# Author(s): Brian Harring (ferringb@gentoo.org)
+# License: GPL2
+# $Id: metadata.py 1964 2005-09-03 00:16:16Z ferringb $
+
+import os, stat
+import flat_hash
+import cache_errors
+import eclass_cache
+from template import reconstruct_eclasses, serialize_eclasses
+from mappings import ProtectedDict, LazyLoad
+
+# this is the old cache format, flat_list. count maintained here.
+magic_line_count = 22
+
+# store the current key order *here*.
+class database(flat_hash.database):
+ complete_eclass_entries = False
+ auxdbkey_order=('DEPEND', 'RDEPEND', 'SLOT', 'SRC_URI',
+ 'RESTRICT', 'HOMEPAGE', 'LICENSE', 'DESCRIPTION',
+ 'KEYWORDS', 'INHERITED', 'IUSE', 'CDEPEND',
+ 'PDEPEND', 'PROVIDE', 'EAPI')
+
+ autocommits = True
+
+ def __init__(self, location, *args, **config):
+ loc = location
+ super(database, self).__init__(location, *args, **config)
+ self.location = os.path.join(loc, "metadata","cache")
+ self.ec = eclass_cache.cache(loc)
+
+ def __getitem__(self, cpv):
+ return flat_hash.database.__getitem__(self, cpv)
+
+
+ def _parse_data(self, data, mtime):
+ # easy attempt first.
+ data = list(data)
+ if len(data) != magic_line_count:
+ d = flat_hash.database._parse_data(self, data, mtime)
+ else:
+ # this one's interesting.
+ d = {}
+
+ for line in data:
+ # yes, meant to iterate over a string.
+ hashed = False
+ for idx, c in enumerate(line):
+ if not c.isalpha():
+ if c == "=" and idx > 0:
+ hashed = True
+ d[line[:idx]] = line[idx + 1:]
+ elif c == "_" or c.isdigit():
+ continue
+ break
+ elif not c.isupper():
+ break
+
+ if not hashed:
+ # non hashed.
+ d.clear()
+ for idx, key in enumerate(self.auxdbkey_order):
+ d[key] = data[idx].strip()
+ break
+
+ if "_eclasses_" not in d:
+ if "INHERITED" in d:
+ d["_eclasses_"] = self.ec.get_eclass_data(d["INHERITED"].split(), from_master_only=True)
+ del d["INHERITED"]
+ else:
+ d["_eclasses_"] = reconstruct_eclasses(cpv, d["_eclasses_"])
+
+ return d
+
+
+
+ def _setitem(self, cpv, values):
+ values = ProtectedDict(values)
+
+ # hack. proper solution is to make this a __setitem__ override, since template.__setitem__
+ # serializes _eclasses_, then we reconstruct it.
+ if "_eclasses_" in values:
+ values["INHERITED"] = ' '.join(reconstruct_eclasses(cpv, values["_eclasses_"]).keys())
+ del values["_eclasses_"]
+
+ flat_hash.database._setitem(self, cpv, values)
Index: pym/cache/sqlite.py
===================================================================
--- pym/cache/sqlite.py (revision 0)
+++ pym/cache/sqlite.py (revision 0)
@@ -0,0 +1,67 @@
+# Copyright: 2005 Gentoo Foundation
+# Author(s): Brian Harring (ferringb@gentoo.org)
+# License: GPL2
+# $Id: sqlite.py 1911 2005-08-25 03:44:21Z ferringb $
+
+sqlite_module =__import__("sqlite")
+import os
+import sql_template, fs_template
+import cache_errors
+
+class database(fs_template.FsBased, sql_template.SQLDatabase):
+
+ SCHEMA_DELETE_NAME = "delete_package_values"
+ SCHEMA_DELETE_TRIGGER = """CREATE TRIGGER %s AFTER DELETE on %s
+ begin
+ DELETE FROM %s WHERE pkgid=old.pkgid;
+ end;""" % (SCHEMA_DELETE_NAME, sql_template.SQLDatabase.SCHEMA_PACKAGE_NAME,
+ sql_template.SQLDatabase.SCHEMA_VALUES_NAME)
+
+ _BaseError = sqlite_module.Error
+ _dbClass = sqlite_module
+ _supports_replace = True
+
+ def _dbconnect(self, config):
+ self._dbpath = os.path.join(self.location, fs_template.gen_label(self.location, self.label)+".sqldb")
+ try:
+ self.db = sqlite_module.connect(self._dbpath, mode=self._perms, autocommit=False)
+ if not self._ensure_access(self._dbpath):
+ raise cache_errors.InitializationError(self.__class__, "can't ensure perms on %s" % self._dbpath)
+ self.con = self.db.cursor()
+ except self._BaseError, e:
+ raise cache_errors.InitializationError(self.__class__, e)
+
+
+ def _initdb_con(self, config):
+ sql_template.SQLDatabase._initdb_con(self, config)
+ try:
+ self.con.execute("SELECT name FROM sqlite_master WHERE type=\"trigger\" AND name=%s" % \
+ self._sfilter(self.SCHEMA_DELETE_NAME))
+ if self.con.rowcount == 0:
+ self.con.execute(self.SCHEMA_DELETE_TRIGGER);
+ self.db.commit()
+ except self._BaseError, e:
+ raise cache_errors.InitializationError(self.__class__, e)
+
+ def _table_exists(self, tbl):
+ """return true/false dependant on a tbl existing"""
+ try: self.con.execute("SELECT name FROM sqlite_master WHERE type=\"table\" AND name=%s" %
+ self._sfilter(tbl))
+ except self._BaseError, e:
+ # XXX crappy.
+ return False
+ return len(self.con.fetchall()) == 1
+
+ # we can do it minus a query via rowid.
+ def _insert_cpv(self, cpv):
+ cpv = self._sfilter(cpv)
+ try: self.con.execute(self.SCHEMA_INSERT_CPV_INTO_PACKAGE.replace("INSERT","REPLACE",1) % \
+ (self.label, cpv))
+ except self._BaseError, e:
+ raise cache_errors.CacheCorruption(cpv, "tried to insert a cpv, but failed: %s" % str(e))
+
+ # sums the delete also
+ if self.con.rowcount <= 0 or self.con.rowcount > 2:
+ raise cache_errors.CacheCorruption(cpv, "tried to insert a cpv, but failed- %i rows modified" % self.rowcount)
+ return self.con.lastrowid
+
Index: pym/cache/flat_list.py
===================================================================
--- pym/cache/flat_list.py (revision 0)
+++ pym/cache/flat_list.py (revision 0)
@@ -0,0 +1,109 @@
+import fs_template
+import cache_errors
+import os, stat
+
+# store the current key order *here*.
+class database(fs_template.FsBased):
+
+ autocommits = True
+
+ # do not screw with this ordering. _eclasses_ needs to be last
+ auxdbkey_order=('DEPEND', 'RDEPEND', 'SLOT', 'SRC_URI',
+ 'RESTRICT', 'HOMEPAGE', 'LICENSE', 'DESCRIPTION',
+ 'KEYWORDS', 'IUSE', 'CDEPEND',
+ 'PDEPEND', 'PROVIDE','_eclasses_')
+
+ def __init__(self, label, auxdbkeys, **config):
+ super(database,self).__init__(label, auxdbkeys, **config)
+ self._base = os.path.join(self._base,
+ self.label.lstrip(os.path.sep).rstrip(os.path.sep))
+
+ if len(self._known_keys) > len(self.auxdbkey_order) + 2:
+ raise Exception("less ordered keys then auxdbkeys")
+ if not os.path.exists(self._base):
+ self._ensure_dirs()
+
+
+ def _getitem(self, cpv):
+ d = {}
+ try:
+ myf = open(os.path.join(self._base, cpv),"r")
+ for k,v in zip(self.auxdbkey_order, myf):
+ d[k] = v.rstrip("\n")
+ except (OSError, IOError),e:
+ if isinstance(e,IOError) and e.errno == 2:
+# print "caught for %s" % cpv, e
+# l=os.listdir(os.path.dirname(os.path.join(self._base,cpv)))
+# l.sort()
+# print l
+ raise KeyError(cpv)
+ raise cache_errors.CacheCorruption(cpv, e)
+
+ try: d["_mtime_"] = os.fstat(myf.fileno()).st_mtime
+ except OSError, e:
+ myf.close()
+ raise cache_errors.CacheCorruption(cpv, e)
+ myf.close()
+ return d
+
+
+ def _setitem(self, cpv, values):
+ s = cpv.rfind("/")
+ fp=os.path.join(self._base,cpv[:s],".update.%i.%s" % (os.getpid(), cpv[s+1:]))
+ try: myf=open(fp, "w")
+ except (OSError, IOError), e:
+ if e.errno == 2:
+ try:
+ self._ensure_dirs(cpv)
+ myf=open(fp,"w")
+ except (OSError, IOError),e:
+ raise cache_errors.CacheCorruption(cpv, e)
+ else:
+ raise cache_errors.CacheCorruption(cpv, e)
+
+
+ for x in self.auxdbkey_order:
+ myf.write(values.get(x,"")+"\n")
+
+ myf.close()
+ self._ensure_access(fp, mtime=values["_mtime_"])
+ #update written. now we move it.
+ new_fp = os.path.join(self._base,cpv)
+ try: os.rename(fp, new_fp)
+ except (OSError, IOError), e:
+ os.remove(fp)
+ raise cache_errors.CacheCorruption(cpv, e)
+
+
+ def _delitem(self, cpv):
+ try:
+ os.remove(os.path.join(self._base,cpv))
+ except OSError, e:
+ if e.errno == 2:
+ raise KeyError(cpv)
+ else:
+ raise cache_errors.CacheCorruption(cpv, e)
+
+
+ def has_key(self, cpv):
+ return os.path.exists(os.path.join(self._base, cpv))
+
+
+ def iterkeys(self):
+ """generator for walking the dir struct"""
+ dirs = [self._base]
+ len_base = len(self._base)
+ while len(dirs):
+ for l in os.listdir(dirs[0]):
+ if l.endswith(".cpickle"):
+ continue
+ p = os.path.join(dirs[0],l)
+ st = os.lstat(p)
+ if stat.S_ISDIR(st.st_mode):
+ dirs.append(p)
+ continue
+ yield p[len_base+1:]
+ dirs.pop(0)
+
+
+ def commit(self): pass
Index: pym/cache/fs_template.py
===================================================================
--- pym/cache/fs_template.py (revision 0)
+++ pym/cache/fs_template.py (revision 0)
@@ -0,0 +1,74 @@
+# Copyright: 2005 Gentoo Foundation
+# Author(s): Brian Harring (ferringb@gentoo.org)
+# License: GPL2
+# $Id: fs_template.py 1911 2005-08-25 03:44:21Z ferringb $
+
+import os
+import template, cache_errors
+from portage_data import portage_gid
+
+class FsBased(template.database):
+ """template wrapping fs needed options, and providing _ensure_access as a way to
+ attempt to ensure files have the specified owners/perms"""
+
+ def __init__(self, *args, **config):
+ """throws InitializationError if needs args aren't specified
+ gid and perms aren't listed do to an oddity python currying mechanism
+ gid=portage_gid
+ perms=0665"""
+
+ for x,y in (("gid",portage_gid),("perms",0664)):
+ if x in config:
+ setattr(self, "_"+x, config[x])
+ del config[x]
+ else:
+ setattr(self, "_"+x, y)
+ super(FsBased, self).__init__(*args, **config)
+
+ if self.label.startswith(os.path.sep):
+ # normpath.
+ self.label = os.path.sep + os.path.normpath(self.label).lstrip(os.path.sep)
+
+
+ def _ensure_access(self, path, mtime=-1):
+ """returns true or false if it's able to ensure that path is properly chmod'd and chowned.
+ if mtime is specified, attempts to ensure that's correct also"""
+ try:
+ os.chown(path, -1, self._gid)
+ os.chmod(path, self._perms)
+ if mtime:
+ mtime=long(mtime)
+ os.utime(path, (mtime, mtime))
+ except OSError, IOError:
+ return False
+ return True
+
+ def _ensure_dirs(self, path=None):
+ """with path!=None, ensure beyond self.location. otherwise, ensure self.location"""
+ if path:
+ path = os.path.dirname(path)
+ base = self.location
+ else:
+ path = self.location
+ base='/'
+
+ for dir in path.lstrip(os.path.sep).rstrip(os.path.sep).split(os.path.sep):
+ base = os.path.join(base,dir)
+ if not os.path.exists(base):
+ um=os.umask(0)
+ try:
+ os.mkdir(base, self._perms | 0111)
+ os.chown(base, -1, self._gid)
+ finally:
+ os.umask(um)
+
+
+def gen_label(base, label):
+ """if supplied label is a path, generate a unique label based upon label, and supplied base path"""
+ if label.find(os.path.sep) == -1:
+ return label
+ label = label.strip("\"").strip("'")
+ label = os.path.join(*(label.rstrip(os.path.sep).split(os.path.sep)))
+ tail = os.path.split(label)[1]
+ return "%s-%X" % (tail, abs(label.__hash__()))
+
[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]
^ permalink raw reply [flat|nested] 8+ messages in thread
* [gentoo-portage-dev] [2/3] Cache subsystem rewrite
2005-10-30 17:05 [gentoo-portage-dev] [0/3] Cache subsystem rewrite Brian Harring
2005-10-30 17:14 ` [gentoo-portage-dev] [1/3] " Brian Harring
@ 2005-10-30 17:27 ` Brian Harring
2005-11-03 16:01 ` Jason Stubbs
2005-10-30 17:27 ` [gentoo-portage-dev] [3/3] " Brian Harring
2 siblings, 1 reply; 8+ messages in thread
From: Brian Harring @ 2005-10-30 17:27 UTC (permalink / raw
To: gentoo-portage-dev
[-- Attachment #1.1: Type: text/plain, Size: 1532 bytes --]
Integration patch.
repoman modifications are strictly due to a portdbapi.aux_get
prototype change- removal of a dead optional arg (strict).
emerge modifications are switching over to cache.util's mirroring
code, rather then it's own adhoc implementation
portage.py modifications.
1) Shift eclass_cache out of portage.py, and into it's own module.
2) change eclass_cache class so it no longer uses it's own persistant
backend (it doesn't track ebuilds), it's now strictly an in memory
representation of the eclass stacking across trees
3) portdbapi.__init__ changes
3a) update to use new eclass_cache,
3b) removal of portdbapi.auxdb.values dicts; auxdb is now a dict of
location => cache db object
3c) close_caches/flush_caches modification; it's not needed (no
persistant eclass cache).
4) portdbapi.aux_get mods
4a) metacachedir is no longer a valid arg. use cache.util for cloning
4b) do not access the cache backend as if it were a zero cost dict.
It's not.
pull the values from the backend, and hold it in a local var.
This change was proposed prior to lastX caching duct tape being
shoved into the old backend; correcting aux_get's usage of the backend
eliminates the need for any lastX (additionally it's faster due to no
func over head).
4c) conversion over to catching cache backend thrown exceptions,
rather then generics
4d) INHERITED -> _eclass_ translation for interpretting results of
depend phase of ebuilds.
~harring
[-- Attachment #1.2: integration.patch --]
[-- Type: text/plain, Size: 20590 bytes --]
Index: bin/repoman
===================================================================
--- bin/repoman (revision 2121)
+++ bin/repoman (working copy)
@@ -898,7 +898,7 @@
fails["ebuild.namenomatch"].append(x+"/"+y+".ebuild")
continue
try:
- myaux=portage.db["/"]["porttree"].dbapi.aux_get(catdir+"/"+y,allvars,strict=1)
+ myaux=portage.db["/"]["porttree"].dbapi.aux_get(catdir+"/"+y,allvars)
except KeyError:
stats["ebuild.syntax"]=stats["ebuild.syntax"]+1
fails["ebuild.syntax"].append(x+"/"+y+".ebuild")
Index: bin/emerge
===================================================================
--- bin/emerge (revision 2121)
+++ bin/emerge (working copy)
@@ -2664,14 +2664,12 @@
portage.portdb.flush_cache()
- try:
- os.umask(002)
- os.chown(cachedir, os.getuid(), portage.portage_gid)
- os.chmod(cachedir, 02775)
- except SystemExit, e:
- raise # Needed else can't exit
- except:
- pass
+ ec = portage.eclass_cache.cache(portage.portdb.porttree_root)
+ # kinda ugly.
+ # XXX: nuke the filter when mr UNUSED_0? keys are dead
+ cm = portage.settings.load_best_module("portdbapi.metadbmodule")(myportdir, "metadata/cache",
+ filter(lambda x: not x.startswith("UNUSED_0"), portage.auxdbkeys))
+
# we don't make overlay trees cache here, plus we don't trust portage.settings.categories
porttree_root = portage.portdb.porttree_root
conf = portage.config(config_profile_path=portage.settings.profile_path[:], \
@@ -2687,75 +2685,54 @@
pass
pdb = portage.portdbapi(porttree_root, conf)
- cp_list = pdb.cp_all()
- if len(cp_list) == 0:
- print "no metadata to transfer, exiting"
- sys.exit(0)
- cp_list.sort()
- pcnt=0
- pcntstr=""
- pcntcount=len(cp_list)/100.0
- nextupdate=pcntcount
- current=1
- def cleanse_cache(pdb, cat, saves, porttree_root=porttree_root):
- try:
- if len(saves):
- d={}
- for v in saves:
- d[portage.catsplit(v)[1]] = True
- for pv in pdb.auxdb[porttree_root][cat].keys():
- if pv not in d:
- pdb.auxdb[porttree_root][cat].del_key(pv)
- else:
- try:
- pdb.auxdb[porttree_root][cat].clear()
- del pdb.auxdb[porttree_root][cat]
- except KeyError:
- pass
- except KeyError:
- # stop breaking things, cleansing is minor.
- pass
+ cp_all_list = pdb.cp_all()
+ import cache.util
- savelist = []
- catlist = []
- oldcat = portage.catsplit(cp_list[0])[0]
- for cp in cp_list:
- current += 1
- if current >= nextupdate:
- pcnt += 1
- nextupdate += pcntcount
- if "--quiet" not in myopts:
- pcntstr = str(pcnt)
- sys.stdout.write("\b"*(len(pcntstr)+1)+pcntstr+"%")
- sys.stdout.flush()
- cat = portage.catsplit(cp)[0]
- if cat != oldcat:
- catlist.append(oldcat)
- cleanse_cache(pdb, oldcat, savelist)
- savelist = []
- oldcat = cat
- mymatches = pdb.xmatch("match-all", cp)
- savelist.extend(mymatches)
- for cpv in mymatches:
- try: pdb.aux_get(cpv, ["IUSE"],metacachedir=myportdir+"/metadata/cache",debug=("cachedebug" in portage.features))
- except SystemExit: raise
- except Exception, e: print "\nFailed cache update:",cpv,e
- catlist.append(oldcat)
- catlist.append("local")
- cleanse_cache(pdb, oldcat, savelist)
- filelist = portage.listdir(cachedir+"/"+myportdir)
- for x in filelist:
- found = False
- for y in catlist:
- if x.startswith(y):
- found = True
- break
- if not found:
- portage.spawn("cd /; rm -Rf "+cachedir+"/"+myportdir+"/"+x,portage.settings,free=1,droppriv=1)
+ class percentage_noise_maker(cache.util.quiet_mirroring):
+ def __init__(self, dbapi):
+ self.dbapi = dbapi
+ self.cp_all = dbapi.cp_all()
+ self.cp_all.sort()
+ l = len(self.cp_all)
+ self.call_update_min = 100000000
+ self.min_cp_all = l/100.0
+ self.count = 1
+ self.pstr = ''
+
+ def __iter__(self):
+ for x in self.cp_all:
+ self.count += 1
+ if self.count > self.min_cp_all:
+ self.call_update_min = 0
+ self.count = 0
+ for y in self.dbapi.cp_list(x):
+ yield y
+ self.call_update_mine = 0
+
+ def update(self, *arg):
+ try: self.pstr = int(self.pstr) + 1
+ except ValueError: self.pstr = 1
+ sys.stdout.write("%s%i%%" % ("\b" * (len(str(self.pstr))+1), self.pstr))
+ sys.stdout.flush()
+ self.call_update_min = 10000000
+
+ def finish(self, *arg):
+ sys.stdout.write("\b\b\b\b100%\n")
+ sys.stdout.flush()
- sys.stdout.write("\n\n")
+ if "--quiet" in myopts:
+ def quicky_cpv_generator(cp_all_list):
+ for x in cp_all_list:
+ for y in pdb.cp_list(x):
+ yield y
+ source = quicky_cpv_generator(pdb.cp_all())
+ noise_maker = cache.util.quiet_mirroring()
+ else:
+ noise_maker = source = percentage_noise_maker(pdb)
+ cache.util.mirror_cache(source, cm, pdb.auxdb[porttree_root], eclass_cache=ec, verbose_instance=noise_maker)
+
sys.stdout.flush()
portage.portageexit()
@@ -2789,7 +2766,7 @@
print "processing",x
for y in mymatches:
try:
- foo=portage.portdb.aux_get(y,["DEPEND"],debug=1)
+ foo=portage.portdb.aux_get(y,["DEPEND"])
except SystemExit, e:
# sys.exit is an exception... And consequently, we can't catch it.
raise
Index: pym/portage.py
===================================================================
--- pym/portage.py (revision 2121)
+++ pym/portage.py (working copy)
@@ -28,6 +28,7 @@
import commands
from time import sleep
from random import shuffle
+ from cache.cache_errors import CacheError
except SystemExit, e:
raise
except Exception, e:
@@ -97,6 +98,7 @@
from portage_locks import unlockfile,unlockdir,lockfile,lockdir
import portage_checksum
from portage_checksum import perform_md5,perform_checksum,prelink_capable
+ import eclass_cache
from portage_localization import _
except SystemExit, e:
raise
@@ -921,9 +923,8 @@
if self.modules["user"] == None:
self.modules["user"] = {}
self.modules["default"] = {
- "portdbapi.metadbmodule": "portage_db_metadata.database",
- "portdbapi.auxdbmodule": "portage_db_flat.database",
- "eclass_cache.dbmodule": "portage_db_cpickle.database",
+ "portdbapi.metadbmodule": "cache.metadata.database",
+ "portdbapi.auxdbmodule": "cache.flat_hash.database",
}
self.usemask=[]
@@ -5013,116 +5014,6 @@
def populate(self):
self.populated=1
-# ----------------------------------------------------------------------------
-class eclass_cache:
- """Maintains the cache information about eclasses used in ebuild."""
- def __init__(self,porttree_root,settings):
- self.porttree_root = porttree_root
- self.settings = settings
- self.depcachedir = self.settings.depcachedir[:]
-
- self.dbmodule = self.settings.load_best_module("eclass_cache.dbmodule")
-
- self.packages = {} # {"PV": {"eclass1": ["location", "_mtime_"]}}
- self.eclasses = {} # {"Name": ["location","_mtime_"]}
-
- # don't fool with porttree ordering unless you *ensure* that ebuild.sh's inherit
- # ordering is *exactly* the same
- self.porttrees=[self.porttree_root]
- self.porttrees.extend(self.settings["PORTDIR_OVERLAY"].split())
- #normalize the path now, so it's not required later.
- self.porttrees = [os.path.normpath(x) for x in self.porttrees]
- self.update_eclasses()
-
- def close_caches(self):
- for x in self.packages.keys():
- for y in self.packages[x].keys():
- try:
- self.packages[x][y].sync()
- self.packages[x][y].close()
- except SystemExit, e:
- raise
- except Exception,e:
- writemsg("Exception when closing DB: %s: %s\n" % (Exception,e))
- del self.packages[x][y]
- del self.packages[x]
-
- def flush_cache(self):
- self.packages = {}
- self.eclasses = {}
- self.update_eclasses()
-
- def update_eclasses(self):
- self.eclasses = {}
- for x in suffix_array(self.porttrees, "/eclass"):
- if x and os.path.exists(x):
- dirlist = listdir(x)
- for y in dirlist:
- if y[-len(".eclass"):]==".eclass":
- try:
- ys=y[:-len(".eclass")]
- ymtime=os.stat(x+"/"+y)[stat.ST_MTIME]
- except SystemExit, e:
- raise
- except:
- continue
- self.eclasses[ys] = [x, ymtime]
-
- def setup_package(self, location, cat, pkg):
- if not self.packages.has_key(location):
- self.packages[location] = {}
-
- if not self.packages[location].has_key(cat):
- try:
- self.packages[location][cat] = self.dbmodule(self.depcachedir+"/"+location, cat+"-eclass", [], uid, portage_gid)
- except SystemExit, e:
- raise
- except Exception, e:
- writemsg("\n!!! Failed to open the dbmodule for eclass caching.\n")
- writemsg("!!! Generally these are permission problems. Caught exception follows:\n")
- writemsg("!!! "+str(e)+"\n")
- writemsg("!!! Dirname: "+str(self.depcachedir+"/"+location)+"\n")
- writemsg("!!! Basename: "+str(cat+"-eclass")+"\n\n")
- sys.exit(123)
-
- def sync(self, location, cat, pkg):
- if self.packages[location].has_key(cat):
- self.packages[location][cat].sync()
-
- def update_package(self, location, cat, pkg, eclass_list):
- self.setup_package(location, cat, pkg)
- if not eclass_list:
- return 1
-
- data = {}
- for x in eclass_list:
- if x not in self.eclasses:
- writemsg("Eclass '%s' does not exist for '%s'\n" % (x, cat+"/"+pkg))
- return 0
- data[x] = [self.eclasses[x][0],self.eclasses[x][1]]
-
- self.packages[location][cat][pkg] = data
- self.sync(location,cat,pkg)
- return 1
-
- def is_current(self, location, cat, pkg, eclass_list):
- self.setup_package(location, cat, pkg)
-
- if not eclass_list:
- return 1
-
- if not (self.packages[location][cat].has_key(pkg) and self.packages[location][cat][pkg] and eclass_list):
- return 0
-
- myp = self.packages[location][cat][pkg]
- for x in eclass_list:
- if not (x in self.eclasses and x in myp and myp[x] == self.eclasses[x]):
- return 0
-
- return 1
-
-# ----------------------------------------------------------------------------
-
auxdbkeys=[
'DEPEND', 'RDEPEND', 'SLOT', 'SRC_URI',
'RESTRICT', 'HOMEPAGE', 'LICENSE', 'DESCRIPTION',
@@ -5136,6 +5027,8 @@
def close_portdbapi_caches():
for i in portdbapi.portdbapi_instances:
i.close_caches()
+
+
class portdbapi(dbapi):
"""this tree will scan a portage directory located at root (passed to init)"""
portdbapi_instances = []
@@ -5178,33 +5071,34 @@
if self.tmpfs and not os.access(self.tmpfs, os.R_OK):
self.tmpfs = None
- self.eclassdb = eclass_cache(self.porttree_root, self.mysettings)
+ self.eclassdb = eclass_cache.cache(self.porttree_root, overlays=settings["PORTDIR_OVERLAY"].split())
self.metadb = {}
self.metadbmodule = self.mysettings.load_best_module("portdbapi.metadbmodule")
- self.auxdb = {}
- self.auxdbmodule = self.mysettings.load_best_module("portdbapi.auxdbmodule")
-
#if the portdbapi is "frozen", then we assume that we can cache everything (that no updates to it are happening)
self.xcache={}
self.frozen=0
self.porttrees=[self.porttree_root]+self.mysettings["PORTDIR_OVERLAY"].split()
+ self.auxdbmodule = self.mysettings.load_best_module("portdbapi.auxdbmodule")
+ self.auxdb = {}
+ # XXX: REMOVE THIS ONCE UNUSED_0 IS YANKED FROM auxdbkeys
+ # ~harring
+ filtered_auxdbkeys = filter(lambda x: not x.startswith("UNUSED_0"), auxdbkeys)
+ for x in self.porttrees:
+ # location, label, auxdbkeys
+ self.auxdb[x] = self.auxdbmodule(portage_const.DEPCACHE_PATH, x, filtered_auxdbkeys, gid=portage_gid)
+
def close_caches(self):
for x in self.auxdb.keys():
- for y in self.auxdb[x].keys():
- self.auxdb[x][y].sync()
- self.auxdb[x][y].close()
- del self.auxdb[x][y]
- del self.auxdb[x]
- self.eclassdb.close_caches()
+ self.auxdb[x].sync()
+ self.auxdb.clear()
def flush_cache(self):
self.metadb = {}
self.auxdb = {}
- self.eclassdb.flush_cache()
def finddigest(self,mycpv):
try:
@@ -5257,7 +5151,7 @@
# when not found
return None, 0
- def aux_get(self,mycpv,mylist,strict=0,metacachedir=None,debug=0):
+ def aux_get(self, mycpv, mylist):
"stub code for returning auxilliary db information, such as SLOT, DEPEND, etc."
'input: "sys-apps/foo-1.0",["SLOT","DEPEND","HOMEPAGE"]'
'return: ["0",">=sys-libs/bar-1.0","http://www.foo.com"] or raise KeyError if error'
@@ -5265,10 +5159,6 @@
cat,pkg = string.split(mycpv, "/", 1)
- if metacachedir:
- if cat not in self.metadb:
- self.metadb[cat] = self.metadbmodule(metacachedir,cat,auxdbkeys,uid,portage_gid)
-
myebuild, mylocation=self.findname2(mycpv)
if not myebuild:
@@ -5310,12 +5200,7 @@
raise portage_exception.SecurityViolation, "Error in verification of signatures: %(errormsg)s" % {"errormsg":str(e)}
writemsg("!!! Manifest is missing or inaccessable: %(manifest)s\n" % {"manifest":myManifestPath})
- if mylocation not in self.auxdb:
- self.auxdb[mylocation] = {}
- if not self.auxdb[mylocation].has_key(cat):
- self.auxdb[mylocation][cat] = self.auxdbmodule(self.depcachedir+"/"+mylocation,cat,auxdbkeys,uid,portage_gid)
-
if os.access(myebuild, os.R_OK):
emtime=os.stat(myebuild)[stat.ST_MTIME]
else:
@@ -5324,47 +5209,24 @@
raise KeyError
try:
- auxdb_is_valid = self.auxdb[mylocation][cat].has_key(pkg) and \
- self.auxdb[mylocation][cat][pkg].has_key("_mtime_") and \
- self.auxdb[mylocation][cat][pkg]["_mtime_"] == emtime
- except SystemExit, e:
- raise
- except Exception, e:
- auxdb_is_valid = 0
- if not metacachedir:
- writemsg("auxdb exception: [%(loc)s]: %(exception)s\n" % {"loc":mylocation+"::"+cat+"/"+pkg, "exception":str(e)})
- if self.auxdb[mylocation][cat].has_key(pkg):
- self.auxdb[mylocation][cat].del_key(pkg)
- self.auxdb[mylocation][cat].sync()
-
- writemsg("auxdb is valid: "+str(auxdb_is_valid)+" "+str(pkg)+"\n", 2)
- doregen = not (auxdb_is_valid and self.eclassdb.is_current(mylocation,cat,pkg,self.auxdb[mylocation][cat][pkg]["INHERITED"].split()))
-
- # when mylocation is not overlay directorys and metacachedir is set,
- # we use cache files, which is usually on /usr/portage/metadata/cache/.
- if doregen and mylocation==self.mysettings["PORTDIR"] and metacachedir and self.metadb[cat].has_key(pkg):
- metadata=self.metadb[cat][pkg]
-
- if "EAPI" not in metadata or not metadata["EAPI"].strip():
- metadata["EAPI"] = "0"
-
- if not eapi_is_supported(metadata["EAPI"]):
- # intentionally wipe keys.
- eapi = metadata["EAPI"]
- mtime = metadata.get("_mtime_", 0)
- metadata = {}
- map(lambda x: metadata.setdefault(x, ''), auxdbkeys)
- metadata["_mtime_"] = long(mtime)
- metadata["EAPI"] == "-"+eapi
-
+ mydata = self.auxdb[mylocation][mycpv]
+ if emtime != long(mydata.get("_mtime_", 0)):
+ doregen = True
+ elif len(mydata.get("_eclasses_", [])) > 0:
+ doregen = not self.eclassdb.is_eclass_data_valid(mydata["_eclasses_"])
else:
- # eclass updates only if we haven't nuked the entry.
- self.eclassdb.update_package(mylocation,cat,pkg,metadata["INHERITED"].split())
+ doregen = False
+
+ except KeyError:
+ doregen = True
+ except CacheError:
+ doregen = True
+ try: del self.auxdb[mylocation][mycpv]
+ except KeyError: pass
- self.auxdb[mylocation][cat][pkg] = metadata
- self.auxdb[mylocation][cat].sync()
+ writemsg("auxdb is valid: "+str(not doregen)+" "+str(pkg)+"\n", 2)
- elif doregen:
+ if doregen:
writemsg("doregen: %s %s\n" % (doregen,mycpv), 2)
writemsg("Generating cache entry(0) for: "+str(myebuild)+"\n",1)
@@ -5385,9 +5247,7 @@
if os.path.exists(mydbkey):
try:
os.unlink(mydbkey)
- except SystemExit, e:
- raise
- except Exception, e:
+ except (IOError, OSError), e:
portage_locks.unlockfile(mylock)
self.lock_held = 0
writemsg("Uncaught handled exception: %(exception)s\n" % {"exception":str(e)})
@@ -5407,19 +5267,13 @@
os.unlink(mydbkey)
mylines=mycent.readlines()
mycent.close()
- except SystemExit, e:
- raise
+
except (IOError, OSError):
portage_locks.unlockfile(mylock)
self.lock_held = 0
writemsg(str(red("\naux_get():")+" (1) Error in "+mycpv+" ebuild.\n"
" Check for syntax error or corruption in the ebuild. (--debug)\n\n"))
raise KeyError
- except Exception, e:
- portage_locks.unlockfile(mylock)
- self.lock_held = 0
- writemsg("Uncaught handled exception: %(exception)s\n" % {"exception":str(e)})
- raise
portage_locks.unlockfile(mylock)
self.lock_held = 0
@@ -5440,18 +5294,24 @@
map(lambda x:mydata.setdefault(x, ""), auxdbkeys)
mydata["EAPI"] = "-"+eapi
+ if mydata.get("INHERITED", False):
+ mydata["_eclasses_"] = self.eclassdb.get_eclass_data(mydata["INHERITED"].split())
+ else:
+ mydata["_eclasses_"] = {}
+
+ del mydata["INHERITED"]
+
mydata["_mtime_"] = emtime
- self.auxdb[mylocation][cat][pkg] = mydata
- self.auxdb[mylocation][cat].sync()
- if not self.eclassdb.update_package(mylocation, cat, pkg, mylines[auxdbkeys.index("INHERITED")].split()):
- sys.exit(1)
+ self.auxdb[mylocation][mycpv] = mydata
#finally, we look at our internal cache entry and return the requested data.
- mydata = self.auxdb[mylocation][cat][pkg]
returnme = []
for x in mylist:
- returnme.append(mydata.get(x,""))
+ if x == "INHERITED":
+ returnme.append(' '.join(mydata.get("_eclasses_", {}).keys()))
+ else:
+ returnme.append(mydata.get(x,""))
if "EAPI" in mylist:
idx = mylist.index("EAPI")
Index: pym/eclass_cache.py
===================================================================
--- pym/eclass_cache.py (revision 0)
+++ pym/eclass_cache.py (revision 0)
@@ -0,0 +1,75 @@
+# Copyright: 2005 Gentoo Foundation
+# Author(s): Nicholas Carpaski (carpaski@gentoo.org), Brian Harring (ferringb@gentoo.org)
+# License: GPL2
+# $Id:$
+
+from portage_util import writemsg
+import portage_file
+import os, sys
+from portage_data import portage_gid
+
+class cache:
+ """
+ Maintains the cache information about eclasses used in ebuild.
+ """
+ def __init__(self, porttree_root, overlays=[]):
+ self.porttree_root = porttree_root
+
+ self.eclasses = {} # {"Name": ("location","_mtime_")}
+
+ # screw with the porttree ordering, w/out having bash inherit match it, and I'll hurt you.
+ # ~harring
+ self.porttrees = [self.porttree_root]+overlays
+ self.porttrees = tuple(map(portage_file.normpath, self.porttrees))
+ self._master_eclass_root = os.path.join(self.porttrees[0],"eclass")
+ self.update_eclasses()
+
+ def close_caches(self):
+ import traceback
+ traceback.print_stack()
+ print "%s close_cache is deprecated" % self.__class__
+ self.eclasses.clear()
+
+ def flush_cache(self):
+ import traceback
+ traceback.print_stack()
+ print "%s flush_cache is deprecated" % self.__class__
+
+ self.update_eclasses()
+
+ def update_eclasses(self):
+ self.eclasses = {}
+ eclass_len = len(".eclass")
+ for x in [portage_file.normpath(os.path.join(y,"eclass")) for y in self.porttrees]:
+ if not os.path.isdir(x):
+ continue
+ for y in [y for y in os.listdir(x) if y.endswith(".eclass")]:
+ try:
+ mtime=os.stat(x+"/"+y).st_mtime
+ except OSError:
+ continue
+ ys=y[:-eclass_len]
+ self.eclasses[ys] = (x, long(mtime))
+
+ def is_eclass_data_valid(self, ec_dict):
+ if not isinstance(ec_dict, dict):
+ return False
+ for eclass, tup in ec_dict.iteritems():
+ if eclass not in self.eclasses or tuple(tup) != self.eclasses[eclass]:
+ return False
+
+ return True
+
+ def get_eclass_data(self, inherits, from_master_only=False):
+ ec_dict = {}
+ for x in inherits:
+ try:
+ ec_dict[x] = self.eclasses[x]
+ except:
+ print "ec=",ec_dict
+ print "inherits=",inherits
+ raise
+ if from_master_only and self.eclasses[x][0] != self._master_eclass_root:
+ return None
+
+ return ec_dict
[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]
^ permalink raw reply [flat|nested] 8+ messages in thread
* [gentoo-portage-dev] [3/3] Cache subsystem rewrite
2005-10-30 17:05 [gentoo-portage-dev] [0/3] Cache subsystem rewrite Brian Harring
2005-10-30 17:14 ` [gentoo-portage-dev] [1/3] " Brian Harring
2005-10-30 17:27 ` [gentoo-portage-dev] [2/3] " Brian Harring
@ 2005-10-30 17:27 ` Brian Harring
2 siblings, 0 replies; 8+ messages in thread
From: Brian Harring @ 2005-10-30 17:27 UTC (permalink / raw
To: gentoo-portage-dev
[-- Attachment #1.1: Type: text/plain, Size: 55 bytes --]
removal of the old cache modules.
'nuff said.
~harring
[-- Attachment #1.2: remove-old-cache.patch --]
[-- Type: text/plain, Size: 15210 bytes --]
Index: pym/portage_db_cpickle.py
===================================================================
--- pym/portage_db_cpickle.py (revision 2121)
+++ pym/portage_db_cpickle.py (working copy)
@@ -1,79 +0,0 @@
-# Copyright 2004 Gentoo Foundation
-# Distributed under the terms of the GNU General Public License v2
-# $Id: /var/cvsroot/gentoo-src/portage/pym/Attic/portage_db_cpickle.py,v 1.9.2.2 2005/04/23 07:26:04 jstubbs Exp $
-
-
-import anydbm,cPickle,types
-from os import chown,access,R_OK,unlink
-import os
-
-import portage_db_template
-
-class database(portage_db_template.database):
- def module_init(self):
- self.modified = False
-
- prevmask=os.umask(0)
- if not os.path.exists(self.path):
- os.makedirs(self.path, 02775)
-
- self.filename = self.path + "/" + self.category + ".cpickle"
-
- if access(self.filename, R_OK):
- try:
- mypickle=cPickle.Unpickler(open(self.filename,"r"))
- mypickle.find_global=None
- self.db = mypickle.load()
- except SystemExit, e:
- raise
- except:
- self.db = {}
- else:
- self.db = {}
-
- os.umask(prevmask)
-
- def has_key(self,key):
- self.check_key(key)
- if self.db.has_key(key):
- return 1
- return 0
-
- def keys(self):
- return self.db.keys()
-
- def get_values(self,key):
- self.check_key(key)
- if self.db.has_key(key):
- return self.db[key]
- return None
-
- def set_values(self,key,val):
- self.modified = True
- self.check_key(key)
- self.db[key] = val
-
- def del_key(self,key):
- if self.has_key(key):
- del self.db[key]
- self.modified = True
- return True
- return False
-
- def sync(self):
- if self.modified:
- try:
- if os.path.exists(self.filename):
- unlink(self.filename)
- cPickle.dump(self.db, open(self.filename,"w"), -1)
- os.chown(self.filename,self.uid,self.gid)
- os.chmod(self.filename, 0664)
- except SystemExit, e:
- raise
- except:
- pass
-
- def close(self):
- self.sync()
- self.db = None;
-
Index: pym/portage_db_anydbm.py
===================================================================
--- pym/portage_db_anydbm.py (revision 2121)
+++ pym/portage_db_anydbm.py (working copy)
@@ -1,64 +0,0 @@
-# Copyright 2004 Gentoo Foundation
-# Distributed under the terms of the GNU General Public License v2
-# $Id: /var/cvsroot/gentoo-src/portage/pym/Attic/portage_db_anydbm.py,v 1.11.2.1 2005/01/16 02:35:33 carpaski Exp $
-
-
-import anydbm,cPickle,types,os
-
-import portage_db_template
-
-class database(portage_db_template.database):
- def module_init(self):
- prevmask=os.umask(0)
- if not os.path.exists(self.path):
- current_path="/"
- for mydir in self.path.split("/"):
- current_path += "/"+mydir
- if not os.path.exists(current_path):
- os.mkdir(current_path)
-
- self.filename = self.path + "/" + self.category + ".anydbm"
-
- try:
- # open it read/write
- self.db = anydbm.open(self.filename, "c", 0664)
- except SystemExit, e:
- raise
- except:
- # Create a new db... DB type not supported anymore?
- self.db = anydbm.open(self.filename, "n", 0664)
-
- os.umask(prevmask)
-
- def has_key(self,key):
- self.check_key(key)
- if self.db.has_key(key):
- return 1
- return 0
-
- def keys(self):
- return self.db.keys()
-
- def get_values(self,key):
- self.check_key(key)
- if self.db.has_key(key):
- myval = cPickle.loads(self.db[key])
- return myval
- return None
-
- def set_values(self,key,val):
- self.check_key(key)
- self.db[key] = cPickle.dumps(val,cPickle.HIGHEST_PROTOCOL)
-
- def del_key(self,key):
- if self.has_key(key):
- del self.db[key]
- return True
- return False
-
- def sync(self):
- self.db.sync()
-
- def close(self):
- self.db.close()
-
Index: pym/portage_db_template.py
===================================================================
--- pym/portage_db_template.py (revision 2121)
+++ pym/portage_db_template.py (working copy)
@@ -1,174 +0,0 @@
-# Copyright 2004 Gentoo Foundation
-# Distributed under the terms of the GNU General Public License v2
-# $Id: /var/cvsroot/gentoo-src/portage/pym/Attic/portage_db_template.py,v 1.11.2.1 2005/01/16 02:35:33 carpaski Exp $
-
-
-import os.path,string
-from portage_util import getconfig, ReadOnlyConfig
-from portage_exception import CorruptionError
-
-class database:
- def __init__(self,path,category,dbkeys,uid,gid,config_path="/etc/portage/module_configs/"):
- self.__cacheArray = [None, None, None]
- self.__cacheKeyArray = [None, None, None]
- self.__template_init_called = True
- self.path = path
- self.category = category
- self.dbkeys = dbkeys
- self.uid = uid
- self.gid = gid
-
- self.config = None
- self.__load_config(config_path)
-
- self.module_init()
-
- def getModuleName(self):
- return self.__module__+"."+self.__class__.__name__[:]
-
- def __load_config(self,config_path):
- config_file = config_path + "/" + self.getModuleName()
- self.config = ReadOnlyConfig(config_file)
-
- def __check_init(self):
- try:
- if self.__template_init_called:
- pass
- except SystemExit, e:
- raise
- except:
- raise NotImplementedError("db_template.__init__ was overridden")
-
- def check_key(self,key):
- if (not key) or not isinstance(key, str):
- raise KeyError, "No key provided. key: %s" % (key)
-
- def clear(self):
- for x in self.keys():
- self.del_key(x)
-
- def __addCache(self,key,val):
- del self.__cacheArray[2]
- self.__cacheArray.insert(0,val)
- del self.__cacheKeyArray[2]
- self.__cacheKeyArray.insert(0,key)
-
- def __delCache(self,key):
- i = self.__cacheKeyArray.index(key)
- self.__cacheArray[i] = None
- self.__cacheKeyArray[i] = None
-
- def flushCache(self):
- self.__cacheArray = [None, None, None]
- self.__cacheKeyArray = [None, None, None]
-
- def __getitem__(self,key):
- if key in self.__cacheKeyArray:
- i = self.__cacheKeyArray.index(key)
- return self.__cacheArray[i]
-
- self.check_key(key)
- if self.has_key(key):
- try:
- values = self.get_values(key)
- self.__addCache(key,values)
- return values
- except SystemExit, e:
- raise
- except Exception, e:
- raise CorruptionError("Corruption detected when reading key '%s': %s" % (key,str(e)))
- raise KeyError("Key not in db: '%s'" % (key))
-
- def __setitem__(self,key,values):
- self.check_key(key)
- self.__addCache(key,values)
- return self.set_values(key,values)
-
- def __delitem__(self,key):
- self.__delCache(key)
- return self.del_key(key)
-
- def has_key(self,key):
- raise NotImplementedError("Method not defined")
-
- def keys(self):
- raise NotImplementedError("Method not defined")
-
- def get_values(self,key):
- raise NotImplementedError("Method not defined")
-
- def set_values(self,key,val):
- raise NotImplementedError("Method not defined")
-
- def del_key(self,key):
- raise NotImplementedError("Method not defined")
-
- def sync(self):
- raise NotImplementedError("Method not defined")
-
- def close(self):
- raise NotImplementedError("Method not defined")
-
-
-
-def test_database(db_class,path,category,dbkeys,uid,gid):
- if "_mtime_" not in dbkeys:
- dbkeys+=["_mtime_"]
- d = db_class(path,category,dbkeys,uid,gid)
-
- print "Module: "+str(d.__module__)
-
- # XXX: Need a way to do this that actually works.
- for x in dir(database):
- if x not in dir(d):
- print "FUNCTION MISSING:",str(x)
-
- list = d.keys()
- if(len(list) == 0):
- values = {}
- for x in dbkeys:
- values[x] = x[:]
- values["_mtime_"] = "1079903037"
- d.set_values("test-2.2.3-r1", values)
- d.set_values("test-2.2.3-r2", values)
- d.set_values("test-2.2.3-r3", values)
- d.set_values("test-2.2.3-r4", values)
-
- list = d.keys()
- print "Key count:",len(list)
-
- values = d.get_values(list[0])
- print "value count:",len(values)
-
- mykey = "foobar-1.2.3-r4"
-
- d.check_key(mykey)
- d.set_values(mykey, values)
- d.sync()
- del d
-
- d = db_class(path,category,dbkeys,uid,gid)
- new_vals = d.get_values(mykey)
-
- if dbkeys and new_vals:
- for x in dbkeys:
- if x not in new_vals.keys():
- print "---",x
- for x in new_vals.keys():
- if x not in dbkeys:
- print "+++",x
- else:
- print "Mismatched:",dbkeys,new_vals
-
- d.del_key(mykey)
-
- print "Should be None:",d.get_values(mykey)
-
- d.clear()
-
- d.sync
- d.close
-
- del d
-
- print "Done."
Index: pym/portage_db_test.py
===================================================================
--- pym/portage_db_test.py (revision 2121)
+++ pym/portage_db_test.py (working copy)
@@ -1,21 +0,0 @@
-#!/usr/bin/python -O
-# Copyright 2004 Gentoo Foundation
-# Distributed under the terms of the GNU General Public License v2
-# $Id: /var/cvsroot/gentoo-src/portage/pym/Attic/portage_db_test.py,v 1.3.2.1 2005/01/16 02:35:33 carpaski Exp $
-
-
-import portage
-import portage_db_template
-import portage_db_anydbm
-import portage_db_flat
-import portage_db_cpickle
-
-import os
-
-uid = os.getuid()
-gid = os.getgid()
-
-portage_db_template.test_database(portage_db_flat.database,"/var/cache/edb/dep", "sys-apps",portage.auxdbkeys,uid,gid)
-portage_db_template.test_database(portage_db_cpickle.database,"/var/cache/edb/dep","sys-apps",portage.auxdbkeys,uid,gid)
-portage_db_template.test_database(portage_db_anydbm.database,"/var/cache/edb/dep", "sys-apps",portage.auxdbkeys,uid,gid)
-
Index: pym/portage_db_metadata.py
===================================================================
--- pym/portage_db_metadata.py (revision 2121)
+++ pym/portage_db_metadata.py (working copy)
@@ -1,49 +0,0 @@
-# Copyright 2004 Gentoo Foundation
-# Distributed under the terms of the GNU General Public License v2
-# $Header: /var/cvsroot/gentoo-src/portage/pym/Attic/portage_db_flat.py,v 1.13.2.6 2005/04/19 07:14:17 ferringb Exp $
-cvs_id_string="$Id: portage_db_flat.py,v 1.13.2.6 2005/04/19 07:14:17 ferringb Exp $"[5:-2]
-
-import os, portage_db_flat_hash, portage_db_flat
-
-class database(portage_db_flat_hash.database):
-
- def get_values(self, key):
- if not key:
- raise KeyError("key is not valid")
-
- try:
- myf = open(self.fullpath + key, "r")
- except OSError:
- raise KeyError("key is not valid")
- mtime = os.fstat(myf.fileno()).st_mtime
- data = myf.read().splitlines()
-
- # easy attempt first.
- if len(data) != portage_db_flat.magic_line_count:
- d = dict(map(lambda x: x.split("=",1), data))
- d["_mtime_"] = mtime
- return portage_db_flat_hash.database.get_values(self, key, d)
- # this one's interesting.
- d = {}
-
- for line in data:
- # yes, meant to iterate over a string.
- hashed = False
- for idx, c in enumerate(line):
- if not c.isalpha():
- if c == "=" and idx > 0:
- hashed = True
- d[line[:idx]] = line[idx + 1:]
- elif c == "_" or c.isdigit():
- continue
- break
- elif not c.isupper():
- break
-
- if not hashed:
- # non hashed.
- data.append(mtime)
- return portage_db_flat.database.get_values(self, key, data=data)
-
- d["_mtime_"] = mtime
- return portage_db_flat_hash.database.get_values(self, key, data=d)
Index: pym/portage_db_flat.py
===================================================================
--- pym/portage_db_flat.py (revision 2121)
+++ pym/portage_db_flat.py (working copy)
@@ -1,124 +0,0 @@
-# Copyright 2004 Gentoo Foundation
-# Distributed under the terms of the GNU General Public License v2
-# $Id: /var/cvsroot/gentoo-src/portage/pym/Attic/portage_db_flat.py,v 1.13.2.6 2005/04/19 07:14:17 ferringb Exp $
-
-
-import types
-import os
-import stat
-
-import portage_db_template
-
-# since this format is massively deprecated,
-# we're hardcoding the previously weird line count
-magic_line_count = 22
-
-class database(portage_db_template.database):
- def module_init(self):
- self.lastkey = None # Cache
- self.lastval = None # Cache
-
- self.fullpath = self.path + "/" + self.category + "/"
-
- if not os.path.exists(self.fullpath):
- prevmask=os.umask(0)
- os.makedirs(self.fullpath, 02775)
- os.umask(prevmask)
- try:
- os.chown(self.fullpath, self.uid, self.gid)
- os.chmod(self.fullpath, 02775)
- except SystemExit, e:
- raise
- except:
- pass
-
- def has_key(self,key):
- if os.path.exists(self.fullpath+key):
- return 1
- return 0
-
- def keys(self):
- # XXX: NEED TOOLS SEPERATED
- # return portage.listdir(self.fullpath,filesonly=1)
- mykeys = []
- for x in os.listdir(self.fullpath):
- if os.path.isfile(self.fullpath+x) and not x.startswith(".update."):
- mykeys += [x]
- return mykeys
-
- def get_values(self,key, data=None):
- """ do not use data unless you know what it does."""
-
- if not key:
- raise KeyError, "key is not set to a valid value"
-
- mydict = {}
- if data == None:
- try:
- # give buffering a hint of the pretty much maximal cache size we deal with
- myf = open(self.fullpath+key, "r", 8192)
- except OSError:
- # either the file didn't exist, or it was removed under our feet.
- raise KeyError("failed reading key")
-
- # nuke the newlines right off the batt.
- data = myf.read().splitlines()
- mydict["_mtime_"] = os.fstat(myf.fileno()).st_mtime
- myf.close()
- else:
- mydict["_mtime_"] = data.pop(-1)
-
- # rely on exceptions to note differing line counts.
- try:
- for x in range(magic_line_count):
- mydict[self.dbkeys[x]] = data[x]
-
- except IndexError:
- raise ValueError, "Key count mistmatch"
-
- return mydict
-
- def set_values(self,key, val, raw=False):
- if not key:
- raise KeyError, "No key provided. key:%s val:%s" % (key,val)
- if not val:
- raise ValueError, "No value provided. key:%s val:%s" % (key,val)
-
- # XXX threaded cache updates won't play nice with this.
- # need a synchronization primitive, or locking (of the fileno, not a seperate file)
- # to correctly handle threading.
-
- update_fp = self.fullpath + ".update." + str(os.getpid()) + "." + key
- myf = open(update_fp,"w")
- if not raw:
- myf.writelines( [ str(val[x]) +"\n" for x in self.dbkeys] )
- if len(self.dbkeys) != magic_line_count:
- myf.writelines(["\n"] * len(self.dbkeys) - magic_line_count)
- mtime = val["_mtime_"]
- else:
- mtime = val.pop(-1)
- myf.writelines(val)
- myf.close()
-
- os.chown(update_fp, self.uid, self.gid)
- os.chmod(update_fp, 0664)
- os.utime(update_fp, (-1,long(mtime)))
- os.rename(update_fp, self.fullpath+key)
-
- def del_key(self,key):
- try:
- os.unlink(self.fullpath+key)
- except OSError, oe:
- # just attempt it without checking, due to the fact that
- # a cache update could be in progress.
- self.lastkey = None
- self.lastval = None
- return 0
- return 1
-
- def sync(self):
- return
-
- def close(self):
- return
-
[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]
^ permalink raw reply [flat|nested] 8+ messages in thread