public inbox for gentoo-commits@lists.gentoo.org
 help / color / mirror / Atom feed
From: "Zac Medico" <zmedico@gentoo.org>
To: gentoo-commits@lists.gentoo.org
Subject: [gentoo-commits] proj/portage:master commit in: pym/portage/dbapi/, pym/portage/, pym/portage/dep/, pym/_emerge/
Date: Sat, 22 Sep 2012 22:05:42 +0000 (UTC)	[thread overview]
Message-ID: <1348351531.0d5b0fbd79ba8b2e7dd5d2f2db7d69cad3e56766.zmedico@gentoo> (raw)

commit:     0d5b0fbd79ba8b2e7dd5d2f2db7d69cad3e56766
Author:     Zac Medico <zmedico <AT> gentoo <DOT> org>
AuthorDate: Sat Sep 22 21:52:35 2012 +0000
Commit:     Zac Medico <zmedico <AT> gentoo <DOT> org>
CommitDate: Sat Sep 22 22:05:31 2012 +0000
URL:        http://git.overlays.gentoo.org/gitweb/?p=proj/portage.git;a=commit;h=0d5b0fbd

Use re.UNICODE for category/package name regexes.

This only affects r'\w' with Python 2.x, since Python 3 already
defaults to re.UNICODE behavior when compiling unicode str objects
(unless re.ASCII is specified). If a repository wants to ban unicode
categore/package names then we can add a layout.conf setting for that,
as discussed in bug #435934.

---
 pym/_emerge/is_valid_package_atom.py |    4 ++--
 pym/portage/dbapi/__init__.py        |    2 +-
 pym/portage/dep/__init__.py          |   10 +++++-----
 pym/portage/manifest.py              |   16 +++++++++++++++-
 pym/portage/versions.py              |    4 ++--
 5 files changed, 25 insertions(+), 11 deletions(-)

diff --git a/pym/_emerge/is_valid_package_atom.py b/pym/_emerge/is_valid_package_atom.py
index 7cb2a5b..a1e4294 100644
--- a/pym/_emerge/is_valid_package_atom.py
+++ b/pym/_emerge/is_valid_package_atom.py
@@ -1,11 +1,11 @@
-# Copyright 1999-2011 Gentoo Foundation
+# Copyright 1999-2012 Gentoo Foundation
 # Distributed under the terms of the GNU General Public License v2
 
 import re
 from portage.dep import isvalidatom
 
 def insert_category_into_atom(atom, category):
-	alphanum = re.search(r'\w', atom)
+	alphanum = re.search(r'\w', atom, re.UNICODE)
 	if alphanum:
 		ret = atom[:alphanum.start()] + "%s/" % category + \
 			atom[alphanum.start():]

diff --git a/pym/portage/dbapi/__init__.py b/pym/portage/dbapi/__init__.py
index ad22f39..fc7c7eb 100644
--- a/pym/portage/dbapi/__init__.py
+++ b/pym/portage/dbapi/__init__.py
@@ -21,7 +21,7 @@ from portage.exception import InvalidData
 from portage.localization import _
 
 class dbapi(object):
-	_category_re = re.compile(r'^\w[-.+\w]*$')
+	_category_re = re.compile(r'^\w[-.+\w]*$', re.UNICODE)
 	_categories = None
 	_use_mutable = False
 	_known_keys = frozenset(x for x in auxdbkeys

diff --git a/pym/portage/dep/__init__.py b/pym/portage/dep/__init__.py
index b4b240d..6e03004 100644
--- a/pym/portage/dep/__init__.py
+++ b/pym/portage/dep/__init__.py
@@ -72,7 +72,7 @@ def _get_slot_re(eapi_attrs):
 	else:
 		slot_re = _slot
 
-	slot_re = re.compile('^' + slot_re + '$', re.VERBOSE)
+	slot_re = re.compile('^' + slot_re + '$', re.VERBOSE | re.UNICODE)
 
 	_slot_re_cache[cache_key] = slot_re
 	return slot_re
@@ -90,7 +90,7 @@ def _get_slot_dep_re(eapi_attrs):
 	else:
 		slot_re = _slot
 
-	slot_re = re.compile('^' + slot_re + '$', re.VERBOSE)
+	slot_re = re.compile('^' + slot_re + '$', re.VERBOSE | re.UNICODE)
 
 	_slot_dep_re_cache[cache_key] = slot_re
 	return slot_re
@@ -123,7 +123,7 @@ def _get_atom_re(eapi_attrs):
 		'(?P<star>=' + cpv_re + r'\*)|' +
 		'(?P<simple>' + cp_re + '))' + 
 		'(' + _slot_separator + _slot_loose + ')?' +
-		_repo + ')(' + _use + ')?$', re.VERBOSE)
+		_repo + ')(' + _use + ')?$', re.VERBOSE | re.UNICODE)
 
 	_atom_re_cache[cache_key] = atom_re
 	return atom_re
@@ -145,7 +145,7 @@ def _get_atom_wildcard_re(eapi_attrs):
 		_extended_cat + r')/(' + pkg_re + r'))' + \
 		'|(?P<star>=((' + _extended_cat + r')/(' + pkg_re + r'))-(?P<version>\*\d+\*)))' + \
 		'(:(?P<slot>' + _slot_loose + r'))?(' +
-		_repo_separator + r'(?P<repo>' + _repo_name + r'))?$')
+		_repo_separator + r'(?P<repo>' + _repo_name + r'))?$', re.UNICODE)
 
 	_atom_wildcard_re_cache[cache_key] = atom_re
 	return atom_re
@@ -1585,7 +1585,7 @@ def extended_cp_match(extended_cp, other_cp):
 	extended_cp_re = _extended_cp_re_cache.get(extended_cp)
 	if extended_cp_re is None:
 		extended_cp_re = re.compile("^" + re.escape(extended_cp).replace(
-			r'\*', '[^/]*') + "$")
+			r'\*', '[^/]*') + "$", re.UNICODE)
 		_extended_cp_re_cache[extended_cp] = extended_cp_re
 	return extended_cp_re.match(other_cp) is not None
 

diff --git a/pym/portage/manifest.py b/pym/portage/manifest.py
index b2f1ff2..25886bb 100644
--- a/pym/portage/manifest.py
+++ b/pym/portage/manifest.py
@@ -4,6 +4,7 @@
 import errno
 import io
 import re
+import sys
 import warnings
 
 import portage
@@ -24,6 +25,11 @@ from portage.const import (MANIFEST1_HASH_FUNCTIONS, MANIFEST2_HASH_DEFAULTS,
 	MANIFEST2_HASH_FUNCTIONS, MANIFEST2_IDENTIFIERS, MANIFEST2_REQUIRED_HASH)
 from portage.localization import _
 
+if sys.hexversion >= 0x3000000:
+	_unicode = str
+else:
+	_unicode = unicode
+
 # Characters prohibited by repoman's file.name check.
 _prohibited_filename_chars_re = re.compile(r'[^a-zA-Z0-9._\-+:]')
 
@@ -108,6 +114,14 @@ class Manifest2Entry(ManifestEntry):
 	def __ne__(self, other):
 		return not self.__eq__(other)
 
+	if sys.hexversion < 0x3000000:
+
+		__unicode__ = __str__
+
+		def __str__(self):
+			return _unicode_encode(self.__unicode__(),
+				encoding=_encodings['repo.content'], errors='strict')
+
 class Manifest(object):
 	parsers = (parseManifest2,)
 	def __init__(self, pkgdir, distdir, fetchlist_dict=None,
@@ -289,7 +303,7 @@ class Manifest(object):
 					# thin manifests with no DIST entries, myentries is
 					# non-empty for all currently known use cases.
 					write_atomic(self.getFullname(), "".join("%s\n" %
-						str(myentry) for myentry in myentries))
+						_unicode(myentry) for myentry in myentries))
 				else:
 					# With thin manifest, there's no need to have
 					# a Manifest file if there are no DIST entries.

diff --git a/pym/portage/versions.py b/pym/portage/versions.py
index 242623f..a9b7e64 100644
--- a/pym/portage/versions.py
+++ b/pym/portage/versions.py
@@ -79,7 +79,7 @@ def _get_pv_re(eapi_attrs):
 	else:
 		pv_re = _pv['dots_disallowed_in_PN']
 
-	pv_re = re.compile('^' + pv_re + '$', re.VERBOSE)
+	pv_re = re.compile(_unicode_decode('^' + pv_re + '$'), re.VERBOSE | re.UNICODE)
 
 	_pv_re_cache[cache_key] = pv_re
 	return pv_re
@@ -292,7 +292,7 @@ def _pkgsplit(mypkg, eapi=None):
 
 	return  (m.group('pn'), m.group('ver'), rev) 
 
-_cat_re = re.compile('^%s$' % _cat)
+_cat_re = re.compile('^%s$' % _cat, re.UNICODE)
 _missing_cat = 'null'
 
 def catpkgsplit(mydata, silent=1, eapi=None):


             reply	other threads:[~2012-09-22 22:05 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-09-22 22:05 Zac Medico [this message]
  -- strict thread matches above, loose matches on Subject: below --
2012-05-13 23:40 [gentoo-commits] proj/portage:master commit in: pym/portage/dbapi/, pym/portage/, pym/portage/dep/, pym/_emerge/ Zac Medico
2012-05-13  8:36 Zac Medico
2012-05-12  9:38 Zac Medico

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1348351531.0d5b0fbd79ba8b2e7dd5d2f2db7d69cad3e56766.zmedico@gentoo \
    --to=zmedico@gentoo.org \
    --cc=gentoo-commits@lists.gentoo.org \
    --cc=gentoo-dev@lists.gentoo.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox