From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from lists.gentoo.org (pigeon.gentoo.org [208.92.234.80]) by finch.gentoo.org (Postfix) with ESMTP id 960B713800E for ; Tue, 7 Aug 2012 08:50:41 +0000 (UTC) Received: from pigeon.gentoo.org (localhost [127.0.0.1]) by pigeon.gentoo.org (Postfix) with SMTP id 8478EE06F3; Tue, 7 Aug 2012 08:50:29 +0000 (UTC) Received: from smtp.gentoo.org (smtp.gentoo.org [140.211.166.183]) by pigeon.gentoo.org (Postfix) with ESMTP id 57354E06F3 for ; Tue, 7 Aug 2012 08:50:29 +0000 (UTC) Received: from hornbill.gentoo.org (hornbill.gentoo.org [94.100.119.163]) (using TLSv1 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by smtp.gentoo.org (Postfix) with ESMTPS id ADE051B403F for ; Tue, 7 Aug 2012 08:50:28 +0000 (UTC) Received: from localhost.localdomain (localhost [127.0.0.1]) by hornbill.gentoo.org (Postfix) with ESMTP id 4D968E5443 for ; Tue, 7 Aug 2012 08:50:26 +0000 (UTC) From: "André Erdmann" To: gentoo-commits@lists.gentoo.org Content-Transfer-Encoding: 8bit Content-type: text/plain; charset=UTF-8 Reply-To: gentoo-dev@lists.gentoo.org, "André Erdmann" Message-ID: <1344329335.d0ed3e90f77542f1ba23d717f87ca850c585ec81.dywi@gentoo> Subject: [gentoo-commits] proj/R_overlay:master commit in: roverlay/ X-VCS-Repository: proj/R_overlay X-VCS-Files: roverlay/strutil.py X-VCS-Directories: roverlay/ X-VCS-Committer: dywi X-VCS-Committer-Name: André Erdmann X-VCS-Revision: d0ed3e90f77542f1ba23d717f87ca850c585ec81 X-VCS-Branch: master Date: Tue, 7 Aug 2012 08:50:26 +0000 (UTC) Precedence: bulk List-Post: List-Help: List-Unsubscribe: List-Subscribe: List-Id: Gentoo Linux mail X-BeenThere: gentoo-commits@lists.gentoo.org X-Archives-Salt: c94bd4b5-0d20-4420-898c-59b9ff1f6db3 X-Archives-Hash: 962004f8080c93c012789e779cb60bf4 commit: d0ed3e90f77542f1ba23d717f87ca850c585ec81 Author: André Erdmann mailerd de> AuthorDate: Tue Aug 7 08:48:55 2012 +0000 Commit: André Erdmann mailerd de> CommitDate: Tue Aug 7 08:48:55 2012 +0000 URL: http://git.overlays.gentoo.org/gitweb/?p=proj/R_overlay.git;a=commit;h=d0ed3e90 strutil: bytes_try_decode() --- roverlay/strutil.py | 49 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 files changed, 47 insertions(+), 2 deletions(-) diff --git a/roverlay/strutil.py b/roverlay/strutil.py index 3228df6..4bf3584 100644 --- a/roverlay/strutil.py +++ b/roverlay/strutil.py @@ -6,13 +6,15 @@ """provides utility functions for string manipulation""" -__all__ = [ 'ascii_filter', 'fix_ebuild_name', +__all__ = [ 'ascii_filter', 'bytes_try_decode', 'fix_ebuild_name', 'pipe_lines', 'shorten_str', 'unquote' ] import re -_EBUILD_NAME_ILLEGAL_CHARS = re.compile ( "[.:]{1,}" ) +_DEFAULT_ENCODINGS = ( 'utf-8', 'ascii', 'iso8859_15', 'utf-16', 'latin_1' ) + +_EBUILD_NAME_ILLEGAL_CHARS = re.compile ( "[.:]{1,}" ) _EBUILD_NAME_ILLEGAL_CHARS_REPLACE_BY = '_' def fix_ebuild_name ( name ): @@ -88,3 +90,46 @@ def unquote ( _str, keep_going=False): return _str # --- end of unquote (...) --- + +def bytes_try_decode ( + byte_str, + encodings=_DEFAULT_ENCODINGS, + charwise_only=False, + force_decode=False +): + """Tries to decode a bytes object to str whose encoding is unknown + but predictable (with charwise conversion as last resort). + Returns byte_str if byte_str is already a str and force_decode is False, + else a decoded str. + + arguments: + * byte_str -- bytes object to decode + * encodings -- encodings to try (None, str or list/iterable of str) + * charwise_only -- do charwise conversion only + * force_decode -- decode byte_str even if it's already a str + """ + if not isinstance ( byte_str, str ): + if not charwise_only and encodings: + ret = None + if not isinstance ( encodings, str ): + try_enc = encodings + else: + try_enc = ( encodings, ) + + for enc in try_enc: + try: + ret = byte_str.decode ( enc ) + break + except: + ret = None + + if ret is not None: + return ret + + ret = "" + for c in byte_str: + ret += chr ( c ) + return ret + else: + return byte_str +# --- end of bytes_try_decode() ---